示例#1
0
    def _is_closed(self, board, position):
        """
        Check whether the position is srounded by Wood/Rigid.

        Parameters
        ----------
        board = np.array(obs['board'])

        position = tuple(obs['position'])
        """

        is_done = np.full(board.shape, False)
        is_done[position] = True
        to_search = [position]

        while to_search:
            x, y = to_search.pop()
            for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                new_position = (x + dx, y + dy)
                if not self._on_board(new_position):
                    continue
                if is_done[new_position]:
                    continue
                is_done[new_position] = True
                if utility.position_is_agent(board, new_position):
                    return False
                if utility.position_is_wall(board, new_position):
                    continue
                if utility.position_is_fog(board, new_position):
                    continue
                to_search.append(new_position)

        return True
示例#2
0
def me_to_enemy_all_corridor(board, pos1, pos2):
    assert (pos1[0] == pos2[0] or pos1[1] == pos2[1])
    if pos1[0] == pos2[0]:
        if pos1[1] < pos2[1]:
            direction = constants.Action.Right
        else:
            direction = constants.Action.Left
    else:
        if pos1[0] < pos2[0]:
            direction = constants.Action.Down
        else:
            direction = constants.Action.Up
    p_dirs = perpendicular_directions(direction)
    pos2_next = utility.get_next_position(pos2, direction)
    next_is_impasse = (not utility.position_on_board(
        board, pos2_next)) or utility.position_is_wall(board, pos2_next)
    if utility.position_on_board(board, pos2_next) and utility.position_is_fog(
            board, pos2_next):
        next_is_impasse = False
    if not (position_is_in_corridor(board, pos2, p_dirs) and next_is_impasse):
        # pos2:enempy must be in impasse
        return False
    all_corridor_flag = True
    pos = utility.get_next_position(pos1, direction)
    while pos != pos2:
        if not (utility.position_is_passage(board, pos)):
            all_corridor_flag = False
            break
        if not position_is_in_corridor(board, pos, p_dirs):
            all_corridor_flag = False
            break
        pos = utility.get_next_position(pos, direction)
    return all_corridor_flag
示例#3
0
def position_is_passable(board, position, enemies):
    '''Determins if a possible can be passed'''
    return all([
        any([
            utility.position_is_agent(board, position),
            utility.position_is_powerup(board, position),
            utility.position_is_passage(board, position),
            utility.position_is_fog(board, position),
        ]), not utility.position_is_enemy(board, position, enemies)
    ])
示例#4
0
def _stop_condition(board, pos, exclude_agent=True):
    if not utility.position_on_board(board, pos):
        return True
    if utility.position_is_fog(board, pos):
        return True
    if utility.position_is_wall(board, pos):
        return True
    if not exclude_agent:
        if utility.position_is_agent(board, pos):
            return True
    return False
示例#5
0
def _all_bomb_real_life(board, bomb_life, bomb_blast_st):
    def get_bomb_real_life(bomb_position, bomb_real_life):
        """One bomb's real life is the minimum life of its adjacent bomb.
           Not that this could be chained, so please call it on each bomb mulitple times until
           converge
        """
        dirs = _all_directions(exclude_stop=True)
        min_life = bomb_real_life[bomb_position]
        for d in dirs:
            pos = bomb_position
            last_pos = bomb_position
            while True:
                pos = utility.get_next_position(pos, d)
                if _stop_condition(board, pos):
                    break
                if bomb_real_life[pos] > 0:
                    if bomb_real_life[pos] < min_life and \
                            _manhattan_distance(pos, last_pos) <= bomb_blast_st[pos] - 1:
                        min_life = bomb_real_life[pos]
                        last_pos = pos
                    else:
                        break
        return min_life

    bomb_real_life_map = np.copy(bomb_life)
    sz = len(board)
    while True:
        no_change = []
        for i in range(sz):
            for j in range(sz):
                if utility.position_is_wall(board, (i, j)) or utility.position_is_powerup(board, (i, j)) \
                        or utility.position_is_fog(board, (i, j)):
                    continue
                if bomb_life[i, j] < 0 + EPSILON:
                    continue
                real_life = get_bomb_real_life((i, j), bomb_real_life_map)
                no_change.append(bomb_real_life_map[i, j] == real_life)
                bomb_real_life_map[i, j] = real_life
        if all(no_change):
            break
    return bomb_real_life_map
    def _find_safe_directions(self, board, my_position, unsafe_directions,
                              bombs, enemies):
        def is_stuck_direction(next_position, bomb_range, next_board, enemies):
            '''Helper function to do determine if the agents next move is possible.'''
            Q = queue.PriorityQueue()
            Q.put((0, next_position))
            seen = set()

            next_x, next_y = next_position
            is_stuck = True
            while not Q.empty():
                dist, position = Q.get()
                seen.add(position)

                position_x, position_y = position
                if next_x != position_x and next_y != position_y:
                    is_stuck = False
                    break

                if dist > bomb_range:
                    is_stuck = False
                    break

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_position = (row + position_x, col + position_y)
                    if new_position in seen:
                        continue

                    if not utility.position_on_board(next_board, new_position):
                        continue

                    if not utility.position_is_passable(
                            next_board, new_position, enemies):
                        continue

                    dist = abs(row + position_x -
                               next_x) + abs(col + position_y - next_y)
                    Q.put((dist, new_position))
            return is_stuck

        # All directions are unsafe. Return a position that won't leave us locked.
        safe = []

        if len(unsafe_directions) == 4:
            next_board = board.copy()
            next_board[my_position] = constants.Item.Bomb.value

            for direction, bomb_range in unsafe_directions.items():
                next_position = utility.get_next_position(
                    my_position, direction)
                next_x, next_y = next_position
                if not utility.position_on_board(next_board, next_position) or \
                   not utility.position_is_passable(next_board, next_position, enemies):
                    continue

                if not is_stuck_direction(next_position, bomb_range,
                                          next_board, enemies):
                    # We found a direction that works. The .items provided
                    # a small bit of randomness. So let's go with this one.
                    return [direction]
            if not safe:
                safe = [constants.Action.Stop]
            return safe

        x, y = my_position
        disallowed = []  # The directions that will go off the board.

        for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            position = (x + row, y + col)
            direction = utility.get_direction(my_position, position)

            # Don't include any direction that will go off of the board.
            if not utility.position_on_board(board, position):
                disallowed.append(direction)
                continue

            # Don't include any direction that we know is unsafe.
            if direction in unsafe_directions:
                continue

            if utility.position_is_passable(
                    board, position, enemies) or utility.position_is_fog(
                        board, position):
                safe.append(direction)

        if not safe:
            # We don't have any safe directions, so return something that is allowed.
            safe = [k for k in unsafe_directions if k not in disallowed]

        if not safe:
            # We don't have ANY directions. So return the stop choice.
            return [constants.Action.Stop]

        return safe
示例#7
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        enemy_mobility = 4
        enemy_bomb = 1

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]
        my_kick = obs["can_kick"]  # whether I can kick

        print("my position",
              my_position,
              "ammo",
              my_ammo,
              "blast",
              my_blast_strength,
              "kick",
              my_kick,
              end="\t")

        #
        # Understand current situation
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])

        #np.set_printoptions(precision=2)
        #print("frac")
        #print(total_frac_blocked)

        # where to place bombs to break wood
        bomb_target_wood, n_breakable \
            = self._get_bomb_target(info["list_boards_no_move"][-1],
                                    my_position,
                                    my_blast_strength,
                                    constants.Item.Wood,
                                    max_breakable=False)

        #bomb_target_enemy = (total_frac_blocked > 0)
        #bomb_target = bomb_target_enemy + bomb_target_wood
        bomb_target = bomb_target_wood

        # List of boards simulated
        list_boards, _ = self._board_sequence(
            board,
            info["curr_bombs"],
            info["curr_flames"],
            self._search_range,
            my_position,
            enemy_mobility=enemy_mobility,
            enemy_bomb=enemy_bomb,
            enemy_blast_strength=info["enemy_blast_strength"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(
                info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value

        #print("boards")
        #for t, b in enumerate(list_boards):
        #    print(t)
        #    print(b[-3:,:])
        #    if t > 2:
        #        break

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, succ, _ \
            = self._search_time_expanded_network(list_boards,
                                                 my_position)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           enemy_mobility=enemy_mobility,
                                           enemy_bomb=enemy_bomb,
                                           enemy_blast_strength=info["enemy_blast_strength"])

        n_survivable = dict()
        kick_actions = list()
        if my_kick:
            # Positions where we kick a bomb if we move to
            kickable, _ = self._kickable_positions(obs,
                                                   info["moving_direction"])
            for next_position in kickable:
                # consider what happens if I kick a bomb
                my_action = self._get_direction(my_position, next_position)

                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue

                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=enemy_mobility,
                                           enemy_bomb=enemy_bomb,
                                           enemy_blast_strength=info["enemy_blast_strength"])
                #print(list_boards_with_kick)
                survivable_with_kick, prev_kick, succ_kick, _ \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    is_survivable[my_action] = True
                    n_survivable[my_action] = [1] + [
                        len(s) for s in survivable_with_kick[1:]
                    ]
                    kick_actions.append(my_action)
        else:
            kickable = set()

        survivable_actions = [a for a in is_survivable if is_survivable[a]]

        #print("survivable actions", survivable_actions)

        if len(survivable_actions) == 0:
            return None

        #
        # Items and bomb target that can be reached in a survivable manner
        #

        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable,
                                         bomb_target)

        #
        # Evaluate the survivability of each action
        #

        x, y = my_position
        for action in survivable_actions:
            # for each survivable action, check the survivability
            if action == constants.Action.Bomb:
                n_survivable[action] = [
                    len(s) for s in survivable_with_bomb[1:]
                ]
                continue

            if action == constants.Action.Up:
                dx = -1
                dy = 0
            elif action == constants.Action.Down:
                dx = 1
                dy = 0
            elif action == constants.Action.Left:
                dx = 0
                dy = -1
            elif action == constants.Action.Right:
                dx = 0
                dy = 1
            elif action == constants.Action.Stop:
                dx = 0
                dy = 0
            else:
                raise ValueError()
            next_position = (x + dx, y + dy)
            n_survivable[action], _info = self._count_survivable(
                succ, 1, next_position)

        if verbose:
            print("n_survivable")
            for a in n_survivable:
                print(a, n_survivable[a])

        #
        # Avoid the action leading to no choice if possible
        #
        updated = False

        max_survivable_positions = max([n[-1] for n in n_survivable.values()])
        if max_survivable_positions > 1:
            for a in n_survivable:
                if n_survivable[a][-1] > max_survivable_positions / 2:
                    continue
                is_survivable[a] = False
                updated = True

        minn = defaultdict(int)
        for a in n_survivable:
            minn[a] = min(n_survivable[a][enemy_mobility:])
        maxmin = max(minn.values())
        if maxmin > 1:
            for a in minn:
                if minn[a] == 1:
                    is_survivable[a] = False
                    updated = True

        if updated:
            survivable_actions = [a for a in is_survivable if is_survivable[a]]

        #
        # Choose the survivable action, if it is the only choice
        #

        if len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value
        """
        #
        # Bomb if it has dominating survivability
        #

        if is_survivable[constants.Action.Bomb]:
            bomb_is_most_survivable = True
            bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb]))
            for action in n_survivable:
                if action == constants.Action.Bomb:
                    continue
                action_sorted = np.array(sorted(n_survivable[action]))
                if any(action_sorted > bomb_sorted):
                    bomb_is_most_survivable = False
                    break
            if bomb_is_most_survivable:
                action = constants.Action.Bomb
                print("Bomb to survive", action)
                return action.value
        """

        #
        # Bomb at a target
        #

        best_action = None
        max_block = 0
        for action in survivable_actions:
            next_position = self._get_next_position(my_position, action)
            block = total_frac_blocked[next_position]
            if block > max_block:
                max_block = block
                best_action = action

        if all([
                is_survivable[constants.Action.Bomb], best_action
                in [constants.Action.Stop, constants.Action.Bomb]
        ]):
            print("Place a bomb at a locally optimal position",
                  constants.Action.Bomb)
            return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = self._get_next_position(my_position, best_action)
            # TODO : PARAMETER TO OPTIMIZE
            if total_frac_blocked[next_position] > 0.1:
                print("Move towards better place to bomb", best_action)
                return best_action.value

        #
        # Bomb to break wood
        #

        consider_bomb = True
        if survivable_with_bomb is None:
            consider_bomb = False
        elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]):
            # if not sufficiently survivable all the time after bomb, do not bomb
            consider_bomb = False
        elif self._might_break_powerup(info["list_boards_no_move"][-1],
                                       my_position, my_blast_strength,
                                       info["might_powerup"]):
            # if might break an item, do not bomb
            consider_bomb = False

        if consider_bomb and bomb_target[my_position]:
            # place bomb if I am at a bomb target
            print("Bomb at a bomb target", constants.Action.Bomb)
            return constants.Action.Bomb.value

        #
        # Move towards good items
        #

        good_items = [
            constants.Item.ExtraBomb, constants.Item.IncrRange,
            constants.Item.Kick
        ]
        good_time_positions = set()  # positions with good items
        for item in good_items:
            good_time_positions = good_time_positions.union(
                reachable_items[item])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions, prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward good item", action)
                return action.value

        #
        # Move towards where to bomb to break wood
        #

        good_time_positions = reachable_items["target"]
        print("good time positions", good_time_positions)
        action = self._find_distance_minimizer(my_position,
                                               good_time_positions, prev,
                                               is_survivable)
        if action is not None:
            print("Moving toward where to bomb", action)
            return action.value

        #
        # Kick
        #

        for my_action in kick_actions:
            if my_action == constants.Action.Up:
                next_position = (my_position[0] - 1, my_position[1])
            elif my_action == constants.Action.Down:
                next_position = (my_position[0] + 1, my_position[1])
            elif my_action == constants.Action.Right:
                next_position = (my_position[0], my_position[1] + 1)
            elif my_action == constants.Action.Left:
                next_position = (my_position[0], my_position[1] - 1)
            # do not kick a bomb if it will break a wall, enemies
            if info["moving_direction"][next_position] is None:
                print("checking static bomb")
                # if it is a static bomb
                if self._can_break(info["list_boards_no_move"][0],
                                   next_position, my_blast_strength,
                                   [constants.Item.Wood] + my_enemies):
                    continue

            list_boards_with_kick_no_move, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True,
                                       enemy_mobility=0)

            for enemy in my_enemies:
                rows, cols = np.where(board == enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable, _, _, _ \
                    = self._search_time_expanded_network(list_boards_with_kick_no_move,
                                                         enemy_position)

                n_survivable_nodes_with_kick = sum(
                    [len(positions) for positions in _survivable])
                if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]:
                    print("Kicking to reduce the survivability",
                          n_survivable_nodes[enemy], "->",
                          n_survivable_nodes_with_kick, my_action)
                    return my_action.value

        #
        # TODO : move toward might powerups
        #

        #
        # Move towards a fog where we have not seen longest
        #

        best_time_position = None
        oldest = 0
        for t, x, y in next_to_items[constants.Item.Fog]:
            neighbors = [(x + dx, y + dy)
                         for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]]
            age = max([
                info["since_last_seen"][position] for position in neighbors
                if self._on_board(position)
            ])
            if age > oldest:
                oldest = age
                best_time_position = (t, x, y)

        if best_time_position is not None:
            action = self._find_distance_minimizer(my_position,
                                                   [best_time_position], prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward oldest fog", action)
                return action.value

        #
        # Choose most survivable action
        #

        action = self._get_most_survivable_action(n_survivable)
        print("Most survivable action", action)
        return action.value
示例#8
0
    def _find_safe_directions(self, board, my_position, unsafe_directions,
                              bombs, enemies, item):
        def is_stuck_direction(next_position, bomb_range, next_board, enemies):
            '''Helper function to do determine if the agents next move is possible.'''
            Q = queue.PriorityQueue()
            Q.put((0, next_position))
            seen = set()

            next_x, next_y = next_position
            is_stuck = True
            while not Q.empty():
                dist, position = Q.get()
                seen.add(position)

                #FIXME is_stuck=False
                position_x, position_y = position
                if next_x != position_x and next_y != position_y:
                    is_stuck = False
                    break

                if dist > bomb_range:
                    is_stuck = False
                    break

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_position = (row + position_x, col + position_y)
                    if new_position in seen:
                        continue

                    if not utility.position_on_board(next_board, new_position):
                        continue

                    if not utility.position_is_passable(
                            next_board, new_position, enemies):
                        continue

                    dist = abs(row + position_x -
                               next_x) + abs(col + position_y - next_y)
                    Q.put((dist, new_position))
            return is_stuck

        # All directions are unsafe. Return a position that won't leave us locked.
        safe = []

        if len(unsafe_directions) == 4:
            next_board = board.copy()
            next_board[my_position] = constants.Item.Bomb.value
            disallowed = []
            for direction, bomb_range in unsafe_directions.items():
                next_position = utility.get_next_position(
                    my_position, direction)
                next_x, next_y = next_position
                if not utility.position_on_board(next_board, next_position) or \
                   not utility.position_is_passable(next_board, next_position, enemies):
                    disallowed.append(direction)
                    continue

                if not is_stuck_direction(next_position, bomb_range,
                                          next_board, enemies):
                    # We found a direction that works. The .items provided
                    # a small bit of randomness. So let's go with this one.
                    return [direction]
            if not safe:

                #当决定不动之前,判断是否是原地放炸弹,如果是原地放炸弹那么从unsafe_directions中随机一个
                # for i in bombs:
                # if len(bombs) == 1 :
                if len(item[constants.Item(3)]) == 1:
                    # if my_position == i['position']:
                    for bomb in bombs:
                        if my_position == bomb['position']:
                            safe = [
                                k for k in unsafe_directions
                                if k not in disallowed
                            ]
                        # break
            if not safe:
                safe = [constants.Action.Stop]
            return safe

        x, y = my_position
        disallowed = []  # The directions that will go off the board.

        for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            position = (x + row, y + col)
            direction = utility.get_direction(my_position, position)

            # Don't include any direction that will go off of the board.
            if not utility.position_on_board(board, position):
                disallowed.append(direction)
                continue

            # Don't include any direction that we know is unsafe.
            if direction in unsafe_directions:

                #当这个不安全位置不能通过的时候就disallow,防止踢炸弹
                if not utility.position_is_passable(board, position, enemies):
                    disallowed.append(direction)

                #当往不安全方向走,正好被炸死的话,那么就不能走。(刚好被炸死需要通过life来制定)
                # if

                continue

            if utility.position_is_passable(
                    board, position, enemies) or utility.position_is_fog(
                        board, position):
                #可能移动一个位置,隔壁存在炸弹
                safe.append(direction)
                for bomb in bombs:
                    if bomb['bomb_life'] == 1:
                        bomb_x, bomb_y = bomb['position']
                        if bomb_x == position[0] and abs(
                                bomb_y -
                                position[1]) <= bomb['blast_strength']:
                            #remove the direction
                            safe.pop()
                            break
                        elif bomb_y == position[1] and abs(
                                bomb_x -
                                position[0]) <= bomb['blast_strength']:
                            safe.pop()
                            break
        if not safe:
            # We don't have any safe directions, so return something that is allowed.
            safe = [k for k in unsafe_directions if k not in disallowed]

        if not safe:
            # We don't have ANY directions. So return the stop choice.
            return [constants.Action.Stop]

        return safe
示例#9
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = obs['board']
        recently_seen_positions = (info["since_last_seen"] < 3)
        board[recently_seen_positions] = info["last_seen"][recently_seen_positions]
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None
        my_kick = obs["can_kick"]  # whether I can kick

        if verbose:
            print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t")

        my_next_position = {constants.Action.Stop: my_position,
                            constants.Action.Bomb: my_position}
        for action in [constants.Action.Up, constants.Action.Down,
                       constants.Action.Left, constants.Action.Right]:
            next_position = self._get_next_position(my_position, action)
            if self._on_board(next_position):
                if board[next_position] == constants.Item.Rigid.value:
                    my_next_position[action] = None
                else:
                    my_next_position[action] = next_position
            else:
                my_next_position[action] = None

        #
        # Understand current situation
        #

        if all([info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb],
                info["prev_position"] == my_position]):
            # if previously blocked, do not reapeat with some probability
            self._inv_tmp *= self._backoff
        else:
            self._inv_tmp = self._inv_tmp_init

        
        # enemy positions
        enemy_positions = list()
        for enemy in my_enemies:
            rows, cols = np.where(board==enemy.value)
            if len(rows) == 0:
                continue
            enemy_positions.append((rows[0], cols[0]))

        # teammate position
        teammate_position = None
        if my_teammate is not None:
            rows, cols = np.where(board==my_teammate.value)
            if len(rows):
                teammate_position = (rows[0], cols[0])
        
        # Positions where we kick a bomb if we move to
        if my_kick:
            kickable, might_kickable = self._kickable_positions(obs, info["moving_direction"])
        else:
            kickable = set()
            might_kickable = set()

        # positions that might be blocked
        if teammate_position is None:
            agent_positions = enemy_positions
        else:
            agent_positions = enemy_positions + [teammate_position]
        might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable)

        # enemy positions over time
        # these might be dissappeared due to extra flames
        if len(enemy_positions):
            rows = [p[0] for p in enemy_positions]
            cols = [p[1] for p in enemy_positions]
            list_enemy_positions = [(rows, cols)]
            _enemy_positions = list()
            for t in range(self._enemy_mobility):
                rows, cols = list_enemy_positions[-1]
                for x, y in zip(rows, cols):
                    for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]:
                        next_position = (x + dx, y + dy)
                        if not self._on_board(next_position):
                            continue
                        _board = info["list_boards_no_move"][t]
                        if utility.position_is_passage(_board, next_position):
                            _enemy_positions.append(next_position)
            _enemy_positions = set(_enemy_positions)
            rows = [p[0] for p in _enemy_positions]
            cols = [p[1] for p in _enemy_positions]
            list_enemy_positions.append((rows, cols))
        else:
            list_enemy_positions = []
            
        
        # survivable actions
        is_survivable = dict()
        for a in self._get_all_actions():
            is_survivable[a] = False
        n_survivable = dict()
        list_boards = dict()
        for my_action in self._get_all_actions():

            next_position = my_next_position[my_action]

            if next_position is None:
                continue

            if my_action == constants.Action.Bomb:
                if any([my_ammo == 0,
                        obs["bomb_blast_strength"][next_position] > 0]):
                    continue
            
            if all([utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1]):
                continue

            if all([my_action != constants.Action.Stop,
                    obs["bomb_blast_strength"][next_position] > 0,
                    next_position not in set.union(kickable, might_kickable)]):
                continue

            if next_position in set.union(kickable, might_kickable):
                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue
            
            # list of boards from next steps
            list_boards[my_action], _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=my_action,
                                       can_kick=my_kick,
                                       enemy_mobility=self._enemy_mobility,
                                       enemy_bomb=self._enemy_bomb,
                                       agent_blast_strength=info["agent_blast_strength"])

            # agents might be disappeared, because of overestimated bombs
            for t, positions in enumerate(list_enemy_positions):
                list_boards[my_action][t][positions] = constants.Item.AgentDummy.value
            
            # some bombs may explode with extra bombs, leading to under estimation
            for t in range(len(list_boards[my_action])):
                flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value)
                list_boards[my_action][t][flame_positions] = constants.Item.Flames.value
                
        """
        processed = Parallel(n_jobs=-1, verbose=0)(
            [delayed(search_time_expanded_network)(list_boards[action][1:], my_next_position[action], action)
             for action in list_boards]
        )
        for survivable, my_action in processed:
            if my_next_position[my_action] in survivable[0]:
                is_survivable[my_action] = True
                n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]]
        """
        
        for my_action in list_boards:
            survivable = search_time_expanded_network(list_boards[my_action][1:],
                                                      my_next_position[my_action])
            if my_next_position[my_action] in survivable[0]:
                is_survivable[my_action] = True
                n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]]
        
        survivable_actions = list()
        for a in is_survivable:
            if not is_survivable[a]:
                continue
            if might_blocked[a] and not is_survivable[constants.Action.Stop]:
                continue
            if n_survivable[a][-1] <= 1:
                is_survivable[a] = False
                continue
            survivable_actions.append(a)

        #
        # Choose action
        #
                
        if len(survivable_actions) == 0:

            #
            # return None, if no survivable actions
            #
        
            return None

        elif len(survivable_actions) == 1:

            #
            # Choose the survivable action, if it is the only choice
            #
            
            action = survivable_actions[0]
            if verbose:
                print("The only survivable action", action)
            return action.value


        #
        # Bomb at a target
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])
    
        if teammate_position is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

            """
            np.set_printoptions(precision=3)
            print("enemy")
            print(total_frac_blocked)
            print("teammate")
            print(total_frac_blocked_teammate)
            print("product")
            prod = total_frac_blocked * (1 - total_frac_blocked_teammate)
            print(prod[:5,:5])
            """

        p_survivable = defaultdict(float)
        for action in n_survivable:
            p_survivable[action] = sum(n_survivable[action]) / self._my_survivability_threshold
            if p_survivable[action] > 1:
                p_survivable[action] = 1

        block = defaultdict(float)
        for action in [constants.Action.Stop,
                       constants.Action.Up, constants.Action.Down,
                       constants.Action.Left, constants.Action.Right]:
            next_position = my_next_position[action]
            if next_position is None:
                continue
            if next_position in set.union(kickable, might_kickable):
                # kick will be considered later
                continue
            if all([utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1,
                    is_survivable[constants.Action.Stop]]):
                # if the next position is flames,
                # I want to stop to wait, which must be feasible
                block[action] = total_frac_blocked[next_position] * p_survivable[constants.Action.Stop]
                if teammate_position is not None:
                    block[action] *= (1 - total_frac_blocked_teammate[next_position])
                block[action] *= self._inv_tmp
                block[action] -=  np.log(-np.log(self.random.uniform()))
                continue
            elif not is_survivable[action]:
                continue
            if all([might_blocked[action],
                    not is_survivable[constants.Action.Stop]]):
                continue

            block[action] = total_frac_blocked[next_position] * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp            
            block[action] -=  np.log(-np.log(self.random.uniform()))
            if might_blocked[action]:
                block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop]
                                 + total_frac_blocked[next_position] * p_survivable[action]) / 2
                if teammate_position is not None:                    
                    block[action] *= (1 - total_frac_blocked_teammate[next_position])
                block[action] *= self._inv_tmp                
                block[action] -=  np.log(-np.log(self.random.uniform()))

        if is_survivable[constants.Action.Bomb]:
            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb)

            n_survivable_nodes_with_bomb = defaultdict(int)
            for enemy in my_enemies:
                # get survivable tree of the enemy
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable = search_time_expanded_network(list_boards_with_bomb,
                                                           enemy_position)
                n_survivable_nodes_with_bomb[enemy] = sum([len(positions) for positions in _survivable])

            n_with_bomb = sum([n_survivable_nodes_with_bomb[enemy] for enemy in my_enemies])
            n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies])
            if n_with_none == 0:
                total_frac_blocked_with_bomb = 0

                # place more bombs, so the stacked enemy cannot kick
                x, y = my_position
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + dx, y + dy)
                    following_position = (x + 2 * dx, y + 2 * dy)
                    if not self._on_board(following_position):
                        continue
                    if all([obs["bomb_life"][next_position] > 0,
                            board[following_position] > constants.Item.AgentDummy.value]):
                        total_frac_blocked_with_bomb = 1
            else:
                total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none

            if teammate_position is not None:
                # get survivable tree of the teammate
                _survivable = search_time_expanded_network(list_boards_with_bomb, teammate_position)
                n_survivable_nodes_with_bomb_teammate = sum([len(positions) for positions in _survivable])

                n_with_bomb = n_survivable_nodes_with_bomb_teammate
                n_with_none = n_survivable_nodes_teammate[my_teammate]
                if n_with_none == 0:
                    total_frac_blocked_with_bomb_teammate = 0
                else:
                    total_frac_blocked_with_bomb_teammate = 1 - n_with_bomb / n_with_none

            action = constants.Action.Bomb
            block[action] = total_frac_blocked_with_bomb * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_with_bomb_teammate)
            block[action] *= self._inv_tmp
            block[action] -=  np.log(-np.log(self.random.uniform()))

        for next_position in kickable:

            action = self._get_direction(my_position, next_position)
            if not is_survivable[action]:
                continue

            list_boards_with_kick, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=action,
                                       can_kick=True)

            n_survivable_nodes_with_kick = defaultdict(int)
            for enemy in my_enemies:
                # get survivable tree of the enemy
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable = search_time_expanded_network(list_boards_with_kick,
                                                           enemy_position)
                n_survivable_nodes_with_kick[enemy] = sum([len(positions) for positions in _survivable])

                n_with_kick = sum([n_survivable_nodes_with_kick[enemy] for enemy in my_enemies])
                n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies])
                if n_with_none == 0:
                    total_frac_blocked[next_position] = 0
                else:
                    total_frac_blocked[next_position] = 1 - n_with_kick / n_with_none

            if teammate_position is not None:
                # get survivable tree of the teammate
                _survivable = search_time_expanded_network(list_boards_with_kick, teammate_position)
                n_survivable_nodes_with_kick_teammate = sum([len(positions) for positions in _survivable])

                n_with_kick = n_survivable_nodes_with_kick_teammate
                n_with_none = n_survivable_nodes_teammate[my_teammate]
                if n_with_none == 0:
                    total_frac_blocked_teammate[next_position] = 0
                else:
                    total_frac_blocked_teammate[next_position] = 1 - n_with_kick / n_with_none
            
            block[action] = total_frac_blocked[next_position] * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp
            block[action] -=  np.log(-np.log(self.random.uniform()))

        max_block = -np.inf
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp:
            if teammate_position is not None:
                teammate_safety = total_frac_blocked_with_bomb_teammate * n_survivable_nodes_with_bomb_teammate
                if any([teammate_safety > self._teammate_survivability_threshold,
                        total_frac_blocked_with_bomb_teammate < self._interfere_threshold,
                        total_frac_blocked_with_bomb_teammate < total_frac_blocked_teammate[my_position]]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if teammate_ok:
                if best_action == constants.Action.Bomb:                    
                    if verbose:
                        print("Bomb is best", constants.Action.Bomb)
                    return constants.Action.Bomb.value

                if best_action == constants.Action.Stop:
                    if verbose:
                        print("Place a bomb at a locally optimal position", constants.Action.Bomb)
                    return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = my_next_position[best_action]

            should_chase = (total_frac_blocked[next_position] > self._chase_threshold)

            if teammate_position is not None:
                teammate_safety = total_frac_blocked_teammate[next_position] * n_survivable_nodes_teammate[my_teammate]
                if any([teammate_safety > self._teammate_survivability_threshold,
                        total_frac_blocked_teammate[next_position] < self._interfere_threshold,
                        total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position]]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if should_chase and teammate_ok:
                if all([utility.position_is_flames(board, next_position),
                        info["flame_life"][next_position] > 1,
                        is_survivable[constants.Action.Stop]]):
                    action = constants.Action.Stop
                    if verbose:
                        print("Wait flames life", action)
                    return action.value
                else:
                    if verbose:
                        print("Move towards better place to bomb", best_action)
                    return best_action.value                

        # Exclude the action representing stop to wait
        max_block = -np.inf
        best_action = None
        for action in survivable_actions:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action
                
        #
        # Do not take risky actions
        #

        most_survivable_action = self._action_most_survivable(n_survivable)

        # ignore actions with low survivability
        _survivable_actions = list()
        for action in n_survivable:
            n = sum(n_survivable[action])
            if not is_survivable[action]:
                continue
            elif n > self._my_survivability_threshold:
                _survivable_actions.append(action)
            else:
                print("RISKY", action)
                is_survivable[action] = False

        if len(_survivable_actions) > 1:
            survivable_actions = _survivable_actions
        elif best_action is not None:
            if verbose:
                print("Take the best action in danger", best_action)
            return best_action.value
        else:
            # Take the most survivable action
            if verbose:
                print("Take the most survivable action", most_survivable_action)
            return most_survivable_action.value

        #
        # Do not interfere with teammate
        #

        if all([teammate_position is not None,
                len(enemy_positions) > 0 or len(info["curr_bombs"]) > 0]):
            # ignore actions that interfere with teammate
            min_interfere = np.inf
            least_interfere_action = None
            _survivable_actions = list()
            for action in survivable_actions:
                if action == constants.Action.Bomb:
                    frac = total_frac_blocked_with_bomb_teammate
                else:
                    next_position = my_next_position[action]
                    frac = total_frac_blocked_teammate[next_position]
                if frac < min_interfere:
                    min_interfere = frac
                    least_interfere_action = action
                if frac < self._interfere_threshold:
                    _survivable_actions.append(action)
                else:
                    print("INTERFERE", action)
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                # Take the least interfering action
                if verbose:
                    print("Take the best action in intereference", best_action)
                return best_action.value
            else:
                if verbose:
                    print("Take the least interfering action", least_interfere_action)
                return least_interfere_action.value

        consider_bomb = True
        if not is_survivable[constants.Action.Bomb]:
            consider_bomb = False

        #
        # Find reachable items
        #

        # List of boards simulated
        list_boards, _ = self._board_sequence(board,
                                              info["curr_bombs"],
                                              info["curr_flames"],
                                              self._search_range,
                                              my_position,
                                              enemy_mobility=self._enemy_mobility,
                                              enemy_bomb=self._enemy_bomb,
                                              agent_blast_strength=info["agent_blast_strength"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value
        
        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, _, _ = self._search_time_expanded_network(list_boards,
                                                                    my_position)        
        if len(survivable[-1]) == 0:
            survivable = [set() for _ in range(len(survivable))]

        # Items and bomb target that can be reached in a survivable manner
        _, _, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable)

        #
        # If I have seen an enemy recently and cannot see him now, them move to the last seen position
        #

        action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info,
                                       my_enemies)
        if action is not None:
            if verbose:
                print("Moving toward last seen enemy", action)
            return action.value            
        
        #
        # If I have seen a teammate recently, them move away from the last seen position
        #

        action = self._action_away_from_teammate(my_position,
                                                 next_to_items[constants.Item.Fog],
                                                 prev,
                                                 is_survivable,
                                                 info,
                                                 my_teammate)
        if action is not None:
            if verbose:
                print("Moving away from last seen teammate", action)
            return action.value            
        
        #
        # Move towards a fog where we have not seen longest
        #
        
        action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info)

        if action is not None:
            #if True:
            if self.random.uniform() < 0.8:
                if verbose:
                    print("Moving toward oldest fog", action)
                return action.value            

        #
        # Choose most survivable action
        #

        max_block = -np.inf
        best_action = None
        for action in survivable_actions:
            if action == constants.Action.Bomb:
                continue
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if verbose:
            print("Take the best action among safe actions (nothing else to do)", best_action)

        if best_action is None:
            # this should not be the case
            return None
        else:
            return best_action.value
示例#10
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_kick = obs["can_kick"]  # whether I can kick
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, succ, _ \
            = self._search_time_expanded_network(info["list_boards_no_move"],
                                                 my_position)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           enemy_mobility=0)

        survivable_actions = [a for a in is_survivable if is_survivable[a]]

        n_survivable = dict()
        kick_actions = list()
        if my_kick:
            # Positions where we kick a bomb if we move to
            kickable = self._kickable_positions(obs, info["moving_direction"])
            for next_position in kickable:
                # consider what happens if I kick a bomb
                my_action = self._get_direction(my_position, next_position)

                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue

                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=0)
                #print(list_boards_with_kick)
                survivable_with_kick, prev_kick, succ_kick, _ \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    survivable_actions.append(my_action)
                    is_survivable[my_action] = True
                    n_survivable[my_action] = [1] + [
                        len(s) for s in survivable_with_kick[1:]
                    ]
                    kick_actions.append(my_action)
        else:
            kickable = set()

        x, y = my_position
        for action in survivable_actions:
            # for each survivable action, check the survivability
            if action == constants.Action.Bomb:
                n_survivable[action] = [
                    len(s) for s in survivable_with_bomb[1:]
                ]
                continue

            if action == constants.Action.Up:
                dx = -1
                dy = 0
            elif action == constants.Action.Down:
                dx = 1
                dy = 0
            elif action == constants.Action.Left:
                dx = 0
                dy = -1
            elif action == constants.Action.Right:
                dx = 0
                dy = 1
            elif action == constants.Action.Stop:
                dx = 0
                dy = 0
            else:
                raise ValueError()
            next_position = (x + dx, y + dy)
            n_survivable[action], _info = self._count_survivable(
                succ, 1, next_position)

        most_survivable_action = None
        if survivable_actions:
            survivable_score = dict()
            for action in n_survivable:
                #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]])
                survivable_score[action] = sum(
                    [n for n in n_survivable[action]])
                if verbose:
                    print(action, survivable_score[action],
                          n_survivable[action])
            best_survivable_score = max(survivable_score.values())

            random.shuffle(survivable_actions)
            for action in survivable_actions:
                if survivable_score[action] == best_survivable_score:
                    most_survivable_action = action
                    break

        if most_survivable_action is not None:
            print("Most survivable action", most_survivable_action)
            return most_survivable_action.value

        # kick if possible
        if my_kick:
            kickable = self._kickable_positions(obs, info["moving_direction"])
        else:
            kickable = set()
        print("Kickable", my_kick, kickable)
        while kickable:
            next_position = kickable.pop()
            action = self._get_direction(my_position, next_position)
            print("Must kick to survive", action)
            return action.value

        # move towards a teammate if she is blocking
        for action in [
                constants.Action.Right, constants.Action.Left,
                constants.Action.Down, constants.Action.Up
        ]:
            next_position = utility.get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility._position_is_item(board, next_position, my_teammate):
                print("Must move to teammate to survive", action)
                return action.value

        # move towards an enemy
        for action in [
                constants.Action.Right, constants.Action.Left,
                constants.Action.Down, constants.Action.Up
        ]:
            next_position = utility.get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility.position_is_enemy(board, next_position, my_enemies):
                print("Must move to enemy to survive", action)
                return action.value

        # move towards anywhere besides ridid
        for action in [
                constants.Action.Right, constants.Action.Left,
                constants.Action.Down, constants.Action.Up
        ]:
            next_position = utility.get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility.position_is_rigid(board, next_position):
                continue
            if utility.position_is_wood(board, next_position):
                continue
            if utility.position_is_bomb(info["curr_bombs"], next_position):
                continue
            print("Try moving to survive", action)
            return action.value

        action = constants.Action.Stop
        print("Must die", action)
        return action
示例#11
0
def get_intermediate_rewards(prev_observations, cur_observations,
                             position_queue):
    # Note: only for team env
    r = [0.0, 0.0, 0.0, 0.0]
    for i in range(4):
        prev_alive = prev_observations[i]['alive']
        prev_n_enemy = 0
        for e in prev_observations[i]['enemies']:
            if e.value in prev_alive:
                prev_n_enemy += 1

        prev_n_teammate = 1 if prev_observations[i][
            'teammate'].value in prev_alive else 0
        prev_can_kick = prev_observations[i]['can_kick']
        prev_n_ammo = prev_observations[i]['ammo']
        prev_n_blast = prev_observations[i]['blast_strength']
        prev_position = prev_observations[i]['position']
        prev_wood_positions = list(
            zip(*np.where(
                prev_observations[i]['board'] == constants.Item.Wood.value)))

        cur_alive = cur_observations[i]['alive']
        cur_n_enemy = 0
        for e in cur_observations[i]['enemies']:
            if e.value in cur_alive:
                cur_n_enemy += 1

        cur_n_teammate = 1 if cur_observations[i][
            'teammate'].value in cur_alive else 0
        cur_can_kick = cur_observations[i]['can_kick']
        cur_n_ammo = cur_observations[i]['ammo']
        cur_n_blast = cur_observations[i]['blast_strength']
        cur_position = cur_observations[i]['position']

        if prev_n_enemy - cur_n_enemy > 0:
            r[i] += (prev_n_enemy - cur_n_enemy) * 0.5
        if prev_n_teammate - cur_n_teammate > 0:
            r[i] -= (prev_n_teammate - cur_n_teammate) * 0.5
        if not prev_can_kick and cur_can_kick:
            r[i] += 0.02
        if cur_n_ammo - prev_n_ammo > 0:
            r[i] += 0.00
        if cur_n_blast - prev_n_blast > 0:
            r[i] += 0.00
        if cur_position not in position_queue:
            r[i] += 0.000
            position_queue.append(cur_position)
        for row, col in prev_wood_positions:
            cur_board = cur_observations[i]['board']
            if not utility.position_is_wall(
                    cur_board, (row, col)) and not utility.position_is_fog(
                        cur_board, (row, col)):
                r[i] += 0.000

    #0 2 teammates, 1 3 teammates
    team_spirit = 0.2
    r0 = r[0] * (1 - team_spirit) + team_spirit * r[2]
    r1 = r[1] * (1 - team_spirit) + team_spirit * r[3]
    r2 = r[2] * (1 - team_spirit) + team_spirit * r[0]
    r3 = r[3] * (1 - team_spirit) + team_spirit * r[1]

    mean1 = (r0 + r2) / 2.0
    mean2 = (r1 + r3) / 2.0
    #make sure it is zero-sum
    r = [r0 - mean2, r1 - mean1, r2 - mean2, r3 - mean1]
    #print(r)
    return r
示例#12
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        self._search_range = 10

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]
        my_kick = obs["can_kick"]  # whether I can kick

        print("my position", my_position, end="\t")

        #
        # Understand current situation
        #

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable_no_move, prev_no_move \
            = self._search_time_expanded_network(info["list_boards_no_move"],
                                                 my_position)

        # Items that can be reached in a survivable manner
        reachable_items_no_move, reached_no_move, next_to_items_no_move \
            = self._find_reachable_items(info["list_boards_no_move"],
                                         my_position,
                                         survivable_no_move)

        # Simulation assuming enemies move

        for enemy_mobility in range(3, -1, -1):
            # List of boards simulated
            list_boards, _ = self._board_sequence(
                board,
                info["curr_bombs"],
                info["curr_flames"],
                self._search_range,
                my_position,
                enemy_mobility=enemy_mobility)

            # List of the set of survivable time-positions at each time
            # and preceding positions
            survivable, prev = self._search_time_expanded_network(
                list_boards, my_position)

            if len(survivable[1]) > 0:
                # Gradually reduce the mobility of enemy, so we have at least one survivable action
                break

        # Items that can be reached in a survivable manner
        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"])

        survivable_actions = [a for a in is_survivable if is_survivable[a]]

        # if verbose:
        if True:
            print("survivable actions are", survivable_actions)

        # Positions where we kick a bomb if we move to
        if my_kick:
            kickable = self._kickable_positions(obs, info["moving_direction"])
        else:
            kickable = set()

        print()
        for t in range(0):
            print(list_boards[t])
            print(survivable[t])
            for key in prev[t]:
                print(key, prev[t][key])

        #
        # Choose an action
        #
        """
        # This is not effective in the current form
        if len(survivable_actions) > 1:
            # avoid the position if only one position at the following step
            # the number of positions that can be reached from the next position
            next = defaultdict(set)
            next_count = defaultdict(int)
            for position in survivable[1]:
                next[position] = set([p for p in prev[2] if position in prev[2][p]])
                next_count[position] = len(next[position])
            print("next count", next_count)
            if max(next_count.values()) > 1:
                for position in survivable[1]:
                    if next_count[position] == 1:
                        risky_action = self._get_direction(my_position, position)
                        is_survivable[risky_action] = False
                survivable_actions = [a for a in is_survivable if is_survivable[a]]                
        """

        # Do not stay on a bomb if I can
        if all([
                obs["bomb_life"][my_position] > 0,
                len(survivable_actions) > 1,
                is_survivable[constants.Action.Stop]
        ]):
            is_survivable[constants.Action.Stop] = False
            survivable_actions = [a for a in is_survivable if is_survivable[a]]

        if len(survivable_actions) == 0:

            print("Must die")
            return None

        elif len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value

        # TODO : shoud check the survivability of all agents in one method

        # Place a bomb if
        # - it does not significantly reduce my survivability
        # - it can reduce the survivability of enemies

        consider_bomb = True
        if survivable_with_bomb is None:
            consider_bomb = False
        elif any([len(s) <= 2 for s in survivable_with_bomb[1:]]):
            # if not sufficiently survivable all the time after bomb, do not bomb
            consider_bomb = False

        if consider_bomb:
            # place bomb if can reach fog/enemy
            if self._can_break(info["list_boards_no_move"][-1], my_position,
                               my_blast_strength,
                               [constants.Item.Fog] + my_enemies):
                print("Bomb to break fog/enemy", constants.Action.Bomb)
                print(info["list_boards_no_move"][-1])
                return constants.Action.Bomb.value

            for enemy in my_enemies:
                # check if the enemy is reachable
                if len(reachable_items_no_move[enemy]) == 0:
                    continue

                # can reach the enemy at enemy_position in enemy_time step
                enemy_time = reachable_items_no_move[enemy][0][0]
                enemy_position = reachable_items_no_move[enemy][0][1:3]

                # check if placing a bomb can reduce the survivability
                # of the enemy
                survivable_before, _ = self._search_time_expanded_network(
                    info["list_boards_no_move"], enemy_position)

                board_with_bomb = deepcopy(obs["board"])
                curr_bombs_with_bomb = deepcopy(info["curr_bombs"])
                # lay a bomb
                board_with_bomb[my_position] = constants.Item.Bomb.value
                bomb = characters.Bomb(
                    characters.Bomber(),  # dummy owner of the bomb
                    my_position,
                    constants.DEFAULT_BOMB_LIFE,
                    my_blast_strength,
                    None)
                curr_bombs_with_bomb.append(bomb)
                list_boards_with_bomb, _ \
                    = self._board_sequence(board_with_bomb,
                                           curr_bombs_with_bomb,
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           enemy_mobility=0)
                survivable_after, _ \
                    = self._search_time_expanded_network(list_boards_with_bomb,
                                                         enemy_position)

                good_before = np.array([len(s) for s in survivable_before])
                good_after = np.array([len(s) for s in survivable_after])

                # TODO : what are good criteria?
                if any(good_after < good_before):
                    # place a bomb if it makes sense
                    print("Bomb to kill an enemy", constants.Action.Bomb)
                    print("before", good_before)
                    print("after ", good_after)
                    print([len(s) for s in survivable])
                    print([len(s) for s in survivable_with_bomb])
                    return constants.Action.Bomb.value
                """
                # find direction towards enemy
                positions = set([x[1:3] for x in next_to_items_no_move[enemy]])
                for t in range(enemy_time, 1, -1):
                    _positions = set()
                    for position in positions:
                        _positions = _positions.union(prev_no_move[t][position])
                    positions = _positions.copy()

                if enemy_time <= my_blast_strength:
                    #if True:
                    positions.add(my_position)
                    positions_after_bomb = set(survivable[1]).difference(positions)
                    if positions_after_bomb:
                        print("Bomb to kill an enemy", enemy, constants.Action.Bomb)
                        return constants.Action.Bomb.value
                """

            # if I can kick, consider placing a bomb to kick
            if my_kick and my_position in survivable_with_bomb[3]:
                # consdier a sequence of actions: place bomb -> move (action) -> move back (kick)
                for action in [
                        constants.Action.Up, constants.Action.Down,
                        constants.Action.Left, constants.Action.Right
                ]:
                    if not is_survivable[action]:
                        continue

                    if action == constants.Action.Up:
                        # kick direction is down
                        dx = 1
                        dy = 0
                    elif action == constants.Action.Down:
                        # kick direction is up
                        dx = -1
                        dy = 0
                    elif action == constants.Action.Left:
                        # kick direction is right
                        dx = 0
                        dy = 1
                    elif action == constants.Action.Right:
                        # kick direction is left
                        dx = 0
                        dy = -1
                    else:
                        raise ValueError()

                    _next_position = (my_position[0] + dx, my_position[1] + dy)
                    if not self._on_board(_next_position):
                        continue
                    else:
                        next_position = _next_position

                    # Find where the bomb stops if kicked
                    for t in range(int(obs["bomb_life"][my_position]) - 2):
                        if not utility.position_is_passage(
                                board, next_position):
                            break
                        _next_position = (next_position[0] + dx,
                                          next_position[1] + dy)
                        if not self._on_board(_next_position):
                            break
                        else:
                            next_position = _next_position
                        if utility.position_is_fog(board, next_position):
                            print("Bomb to kick into fog", action)
                            return constants.Action.Bomb.value
                        elif utility.position_is_enemy(list_boards[t + 2],
                                                       next_position,
                                                       my_enemies):
                            print("Bomb to kick towards enemy", action)
                            return constants.Action.Bomb.value
                """
                x0, y0 = my_position
                positions_against = [(2*x0-x, 2*y0-y) for (x, y) in positions]
                positions_after_bomb = set(survivable[1]).intersection(positions_against)

                if positions_after_bomb:
                    print("Bomb to kick", enemy, constants.Action.Bomb)
                    return constants.Action.Bomb.value
                """

        # kick
        if len(kickable) > 0:

            while kickable:
                # then consider what happens if I kick a bomb
                next_position = kickable.pop()

                # do not kick a bomb if it will break enemies
                if info["moving_direction"][next_position] is None:
                    # if it is a static bomb
                    if self._can_break(info["list_boards_no_move"][0],
                                       next_position, my_blast_strength,
                                       my_enemies):
                        continue

                my_action = self._get_direction(my_position, next_position)
                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=3)
                survivable_with_kick, prev_kick \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    print("Kicking", my_action)
                    return my_action.value

        # if on a bomb, consider where to kick in the following step
        if obs["bomb_life"][my_position] > 0:
            # For each survivable move in the next step,
            # check what happens if we kick in the following step.
            # If the bomb is kicked into a fog, plan to kick.
            # If the bomb is kicked toward an enemy, plan to kick.
            # Otherwise, do not plan to kick.
            for action in [
                    constants.Action.Up, constants.Action.Down,
                    constants.Action.Left, constants.Action.Right
            ]:
                if not is_survivable[action]:
                    continue

                if action == constants.Action.Up:
                    # kick direction is down
                    dx = 1
                    dy = 0
                elif action == constants.Action.Down:
                    # kick direction is up
                    dx = -1
                    dy = 0
                elif action == constants.Action.Left:
                    # kick direction is right
                    dx = 0
                    dy = 1
                elif action == constants.Action.Right:
                    # kick direction is left
                    dx = 0
                    dy = -1
                else:
                    raise ValueError()

                _next_position = (my_position[0] + dx, my_position[1] + dy)
                if not self._on_board(_next_position):
                    continue
                else:
                    next_position = _next_position

                # Find where the bomb stops if kicked
                for t in range(int(obs["bomb_life"][my_position]) - 1):
                    if not utility.position_is_passage(board, next_position):
                        break
                    _next_position = (next_position[0] + dx,
                                      next_position[1] + dy)
                    if not self._on_board(_next_position):
                        break
                    else:
                        next_position = _next_position
                if utility.position_is_fog(board, next_position):
                    print("Moving to kick into fog", action)
                    return action.value
                elif utility.position_is_enemy(list_boards[t + 2],
                                               next_position, my_enemies):
                    print("Moving to kick towards enemy", action)

        # Move towards an enemy
        good_time_positions = set()
        for enemy in my_enemies:
            good_time_positions = good_time_positions.union(
                next_to_items[enemy])

        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions, prev,
                                                   is_survivable)

            if action is not None:
                print("Moving toward enemy", action)
                return action.value

        #
        # Random action
        #
        action = random.choice(survivable_actions)
        print("Random action", action)
        return action.value
示例#13
0
    def _find_reachable_items(self,
                              list_boards,
                              my_position,
                              time_positions,
                              bomb_target=None):
        """
        Find items reachable from my position

        Parameters
        ----------
        list_boards : list
            list of boards, generated by _board_sequence
        my_position : tuple
            my position, where the search starts
        time_positions : list
            survivable time-positions, generated by _search_time_expanded_network

        Return
        ------
        items : dict
            items[item] : list of time-positions from which one can reach item
        reached : array
            minimum time to reach each position on the board
        next_to_items : dict
            next_to_items[item] : list of time-positions from which one can reach
                                  the position next to item
        """

        if bomb_target is None:
            bomb_target = np.full(self.board_shape, False)

        # items found on time_positions and the boundary (for Wood)
        items = defaultdict(list)

        # reached[position] : minimum time to reach the position
        reached = np.full(self.board_shape, np.inf)

        # whether already checked the position
        _checked = np.full(self.board_shape, False)

        # positions next to wood or other agents (count twice if next to two woods)
        next_to_items = defaultdict(list)

        for t, positions in enumerate(time_positions):
            # check the positions reached at time t
            board = list_boards[t]
            for position in positions:
                if reached[position] < np.inf:
                    continue
                reached[position] = t
                item = constants.Item(board[position])
                items[item].append((t, ) + position)
                if bomb_target[position]:
                    items["target"].append((t, ) + position)
                _checked[position] = True
                x, y = position
                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + row, y + col)
                    if not self._on_board(next_position):
                        continue
                    if _checked[next_position]:
                        continue
                    _checked[next_position] = True
                    if any([
                            utility.position_is_agent(board, next_position),
                            utility.position_is_fog(board, next_position)
                    ]):
                        item = constants.Item(board[next_position])
                        items[item].append((t, ) + next_position)
                        next_to_items[item].append((t, ) + position)
                    # ignoring wall that will not exist when explode
                    if utility.position_is_wood(list_boards[-1],
                                                next_position):
                        item = constants.Item(board[next_position])
                        items[item].append((t, ) + next_position)
                        next_to_items[item].append((t, ) + position)

        return items, reached, next_to_items
示例#14
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        enemy_mobility = 4

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]
        my_kick = obs["can_kick"]  # whether I can kick

        print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t")
        
        #
        # Understand current situation
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value
        total_frac_blocked, n_survivable_nodes \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])
        bomb_target_enemy = (total_frac_blocked > 0)
        
        # List of boards simulated
        list_boards, _ = self._board_sequence(board,
                                              info["curr_bombs"],
                                              info["curr_flames"],
                                              self._search_range,
                                              my_position,
                                              enemy_mobility=enemy_mobility)

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, succ, _ \
            = self._search_time_expanded_network(list_boards,
                                                 my_position)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           enemy_mobility=enemy_mobility)

        survivable_actions = [a for a in is_survivable if is_survivable[a]] 

        n_survivable = dict()
        kick_actions = list()
        if my_kick:
            # Positions where we kick a bomb if we move to
            kickable = self._kickable_positions(obs, info["moving_direction"])
            for next_position in kickable:
                # consider what happens if I kick a bomb
                my_action = self._get_direction(my_position, next_position)

                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue

                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=enemy_mobility)
                #print(list_boards_with_kick)
                survivable_with_kick, prev_kick, succ_kick, _ \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    survivable_actions.append(my_action)
                    is_survivable[my_action] = True
                    n_survivable[my_action] = [1] + [len(s) for s in survivable_with_kick[1:]]
                    kick_actions.append(my_action)
        else:
            kickable = set()
        
        if len(survivable_actions) == 0:
            return None

        #
        # bomb target that can be reached in a survivable manner
        #

        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable,
                                         bomb_target_enemy)

        #
        # Evaluate the survivability of each action
        #

        x, y = my_position
        for action in survivable_actions:
            # for each survivable action, check the survivability
            if action == constants.Action.Bomb:
                n_survivable[action] = [len(s) for s in survivable_with_bomb[1:]]
                continue
            
            if action == constants.Action.Up:
                dx = -1
                dy = 0
            elif action == constants.Action.Down:
                dx = 1
                dy = 0 
            elif action == constants.Action.Left:
                dx = 0
                dy = -1
            elif action == constants.Action.Right:
                dx = 0
                dy = 1
            elif action == constants.Action.Stop:
                dx = 0
                dy = 0
            else:
                raise ValueError()
            next_position = (x + dx, y + dy)
            n_survivable[action], _info = self._count_survivable(succ, 1, next_position)

        #if True:
        if verbose:
            print("n_survivable")
            for a in n_survivable:
                print(a, n_survivable[a])

        #
        # Choose the survivable action, if it is the only choice
        #

        if len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value

        #
        # Bomb if it has dominating survivability
        #

        if is_survivable[constants.Action.Bomb]:
            bomb_is_most_survivable = True
            bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb]))
            for action in n_survivable:
                if action == constants.Action.Bomb:
                    continue
                action_sorted = np.array(sorted(n_survivable[action]))
                if any(action_sorted > bomb_sorted):
                    bomb_is_most_survivable = False
                    break
            if bomb_is_most_survivable:
                action = constants.Action.Bomb
                print("Bomb to survive", action)
                return action.value

        #
        # Bomb at a target
        #
            
        consider_bomb = True
        if survivable_with_bomb is None:
            consider_bomb = False
        elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]):
            # if not sufficiently survivable all the time after bomb, do not bomb
            consider_bomb = False
            
        #
        # Place a bomb
        #

        best_action = None
        max_block = 0
        for action in survivable_actions:
            if action == constants.Action.Stop:
                continue
            next_position = self._get_next_position(my_position, action)
            block = total_frac_blocked[next_position]
            if block > max_block:
                max_block = block
                best_action = action

        if consider_bomb and best_action == constants.Action.Stop:
            print("Place a bomb at a locally optimal position", constants.Action.Bomb)
            return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #
        
        if best_action not in [None, constants.Action.Stop]:
            print("Move towards better place to bomb", best_action)
            return best_action.value
        
        good_time_positions = reachable_items["target"]
        if good_time_positions:
            score = [total_frac_blocked[(x,y)] / (t+1) for t, x, y in good_time_positions]
            argmax = np.argwhere(score==np.max(score))
            best_time_positions = [good_time_positions[i[0]] for i in argmax]
            action = self._find_distance_minimizer(my_position,
                                                   best_time_positions,
                                                   #good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward where to bomb", action)
                return action.value

        #
        # Kick
        #

        for my_action in kick_actions:
            if my_action == constants.Action.Up:
                next_position = (my_position[0] - 1, my_position[1])
            elif my_action == constants.Action.Down:
                next_position = (my_position[0] + 1, my_position[1])
            elif my_action == constants.Action.Right:
                next_position = (my_position[0], my_position[1] + 1)
            elif my_action == constants.Action.Left:
                next_position = (my_position[0], my_position[1] - 1)
            # do not kick a bomb if it will break enemies
            if info["moving_direction"][next_position] is None:
                print("checking static bomb")
                # if it is a static bomb                    
                if self._can_break(info["list_boards_no_move"][0],
                                   next_position,
                                   my_blast_strength,
                                   my_enemies):
                    continue

            list_boards_with_kick_no_move, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True,
                                       enemy_mobility=0)

            for enemy in my_enemies:
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable, _, _, _ \
                    = self._search_time_expanded_network(list_boards_with_kick_no_move,
                                                         enemy_position)

                n_survivable_nodes_with_kick = sum([len(positions) for positions in _survivable])
                if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]:
                    print("Kicking to reduce the survivability",
                          n_survivable_nodes[enemy], "->", n_survivable_nodes_with_kick,
                          my_action)
                    return my_action.value

        #
        # Move towards a fog where we have not seen longest
        #

        best_time_position = None
        oldest = 0
        for t, x, y in next_to_items[constants.Item.Fog]:
            neighbors = [(x+dx, y+dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]]
            age = max([info["since_last_seen"][position] for position in neighbors if self._on_board(position)])
            if age > oldest:
                oldest = age
                best_time_position = (t, x, y)

        if best_time_position is not None:
            action = self._find_distance_minimizer(my_position,
                                                   [best_time_position],
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward oldest fog", action)
                return action.value            

        #
        # Choose most survivable action
        #

        survivable_score = dict()
        for action in n_survivable:
            # survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]])
            survivable_score[action] = sum([n for n in n_survivable[action]])
            if verbose:
                print(action, survivable_score[action], n_survivable[action])                
        best_survivable_score = max(survivable_score.values())

        most_survivable_action = None
        random.shuffle(survivable_actions)
        for action in survivable_actions:
            if survivable_score[action] == best_survivable_score:
                most_survivable_action = action
                break
         
        print("Most survivable action", most_survivable_action)
        return most_survivable_action.value