def _what_to_break(cls, board, my_position, blast_strength): x, y = my_position to_break = list() # To up for dx in range(1, blast_strength): if x + dx >= len(board[0]): break position = (x + dx, y) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To down for dx in range(1, blast_strength): if x - dx < 0: break position = (x - dx, y) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To right for dy in range(1, blast_strength): if y + dy >= len(board): break position = (x, y + dy) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To left for dy in range(1, blast_strength): if y - dy < 0: break position = (x, y - dy) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break return to_break
def _is_closed(self, board, position): """ Check whether the position is srounded by Wood/Rigid. Parameters ---------- board = np.array(obs['board']) position = tuple(obs['position']) """ is_done = np.full(board.shape, False) is_done[position] = True to_search = [position] while to_search: x, y = to_search.pop() for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (x + dx, y + dy) if not self._on_board(new_position): continue if is_done[new_position]: continue is_done[new_position] = True if utility.position_is_agent(board, new_position): return False if utility.position_is_wall(board, new_position): continue if utility.position_is_fog(board, new_position): continue to_search.append(new_position) return True
def _check_if_flame_will_gone(obs, prev_two_obs, flame_pos): assert (prev_two_obs[0] is not None) assert (prev_two_obs[1] is not None) # check the flame group in current obs, see if # the whole group was there prev two obs # otherwise, although this flame appears in prev two obs, # it could be a old overlap new, thus will not gone next step if not (utility.position_is_flames(prev_two_obs[0]['board'], flame_pos) \ and utility.position_is_flames(prev_two_obs[1]['board'], flame_pos)): return False board = obs['board'] Q = deque(maxlen=121) Q.append(flame_pos) visited = [flame_pos] dirs = _all_directions(exclude_stop=True) while len(Q) > 0: pos = Q.popleft() if not (utility.position_is_flames(prev_two_obs[0]['board'], pos) \ and utility.position_is_flames(prev_two_obs[1]['board'], pos)): return False for d in dirs: next_pos = utility.get_next_position(pos, d) if utility.position_on_board(board, next_pos) and utility.position_is_agent(board, next_pos): if next_pos not in visited: Q.append(next_pos) visited.append(next_pos) return True
def position_is_bombable(board, position, bombs): return any([ utility.position_is_agent(board, position), utility.position_is_powerup(board, position), utility.position_is_passage(board, position), position_is_flame(board, position), position_is_bomb(bombs, position) ])
def position_is_passable(board, position, enemies): '''Determins if a possible can be passed''' return all([ any([ utility.position_is_agent(board, position), utility.position_is_powerup(board, position), utility.position_is_passage(board, position), utility.position_is_fog(board, position), ]), not utility.position_is_enemy(board, position, enemies) ])
def _stop_condition(board, pos, exclude_agent=True): if not utility.position_on_board(board, pos): return True if utility.position_is_fog(board, pos): return True if utility.position_is_wall(board, pos): return True if not exclude_agent: if utility.position_is_agent(board, pos): return True return False
def act(self, obs, action_space, info): # # Definitions # enemy_mobility = 4 enemy_bomb = 1 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_kick = obs["can_kick"] # whether I can kick print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") # # Understand current situation # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) #np.set_printoptions(precision=2) #print("frac") #print(total_frac_blocked) # where to place bombs to break wood bomb_target_wood, n_breakable \ = self._get_bomb_target(info["list_boards_no_move"][-1], my_position, my_blast_strength, constants.Item.Wood, max_breakable=False) #bomb_target_enemy = (total_frac_blocked > 0) #bomb_target = bomb_target_enemy + bomb_target_wood bomb_target = bomb_target_wood # List of boards simulated list_boards, _ = self._board_sequence( board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where( info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value #print("boards") #for t, b in enumerate(list_boards): # print(t) # print(b[-3:,:]) # if t > 2: # break # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(list_boards, my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable, _ = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() survivable_actions = [a for a in is_survivable if is_survivable[a]] #print("survivable actions", survivable_actions) if len(survivable_actions) == 0: return None # # Items and bomb target that can be reached in a survivable manner # reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable, bomb_target) # # Evaluate the survivability of each action # x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( succ, 1, next_position) if verbose: print("n_survivable") for a in n_survivable: print(a, n_survivable[a]) # # Avoid the action leading to no choice if possible # updated = False max_survivable_positions = max([n[-1] for n in n_survivable.values()]) if max_survivable_positions > 1: for a in n_survivable: if n_survivable[a][-1] > max_survivable_positions / 2: continue is_survivable[a] = False updated = True minn = defaultdict(int) for a in n_survivable: minn[a] = min(n_survivable[a][enemy_mobility:]) maxmin = max(minn.values()) if maxmin > 1: for a in minn: if minn[a] == 1: is_survivable[a] = False updated = True if updated: survivable_actions = [a for a in is_survivable if is_survivable[a]] # # Choose the survivable action, if it is the only choice # if len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value """ # # Bomb if it has dominating survivability # if is_survivable[constants.Action.Bomb]: bomb_is_most_survivable = True bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb])) for action in n_survivable: if action == constants.Action.Bomb: continue action_sorted = np.array(sorted(n_survivable[action])) if any(action_sorted > bomb_sorted): bomb_is_most_survivable = False break if bomb_is_most_survivable: action = constants.Action.Bomb print("Bomb to survive", action) return action.value """ # # Bomb at a target # best_action = None max_block = 0 for action in survivable_actions: next_position = self._get_next_position(my_position, action) block = total_frac_blocked[next_position] if block > max_block: max_block = block best_action = action if all([ is_survivable[constants.Action.Bomb], best_action in [constants.Action.Stop, constants.Action.Bomb] ]): print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = self._get_next_position(my_position, best_action) # TODO : PARAMETER TO OPTIMIZE if total_frac_blocked[next_position] > 0.1: print("Move towards better place to bomb", best_action) return best_action.value # # Bomb to break wood # consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]): # if not sufficiently survivable all the time after bomb, do not bomb consider_bomb = False elif self._might_break_powerup(info["list_boards_no_move"][-1], my_position, my_blast_strength, info["might_powerup"]): # if might break an item, do not bomb consider_bomb = False if consider_bomb and bomb_target[my_position]: # place bomb if I am at a bomb target print("Bomb at a bomb target", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards good items # good_items = [ constants.Item.ExtraBomb, constants.Item.IncrRange, constants.Item.Kick ] good_time_positions = set() # positions with good items for item in good_items: good_time_positions = good_time_positions.union( reachable_items[item]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward good item", action) return action.value # # Move towards where to bomb to break wood # good_time_positions = reachable_items["target"] print("good time positions", good_time_positions) action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward where to bomb", action) return action.value # # Kick # for my_action in kick_actions: if my_action == constants.Action.Up: next_position = (my_position[0] - 1, my_position[1]) elif my_action == constants.Action.Down: next_position = (my_position[0] + 1, my_position[1]) elif my_action == constants.Action.Right: next_position = (my_position[0], my_position[1] + 1) elif my_action == constants.Action.Left: next_position = (my_position[0], my_position[1] - 1) # do not kick a bomb if it will break a wall, enemies if info["moving_direction"][next_position] is None: print("checking static bomb") # if it is a static bomb if self._can_break(info["list_boards_no_move"][0], next_position, my_blast_strength, [constants.Item.Wood] + my_enemies): continue list_boards_with_kick_no_move, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) for enemy in my_enemies: rows, cols = np.where(board == enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable, _, _, _ \ = self._search_time_expanded_network(list_boards_with_kick_no_move, enemy_position) n_survivable_nodes_with_kick = sum( [len(positions) for positions in _survivable]) if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]: print("Kicking to reduce the survivability", n_survivable_nodes[enemy], "->", n_survivable_nodes_with_kick, my_action) return my_action.value # # TODO : move toward might powerups # # # Move towards a fog where we have not seen longest # best_time_position = None oldest = 0 for t, x, y in next_to_items[constants.Item.Fog]: neighbors = [(x + dx, y + dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]] age = max([ info["since_last_seen"][position] for position in neighbors if self._on_board(position) ]) if age > oldest: oldest = age best_time_position = (t, x, y) if best_time_position is not None: action = self._find_distance_minimizer(my_position, [best_time_position], prev, is_survivable) if action is not None: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # action = self._get_most_survivable_action(n_survivable) print("Most survivable action", action) return action.value
def _board_sequence(self, board, bombs, flames, length, my_position, my_action=None, can_kick=False, enemy_mobility=3): """ Simulate the sequence of boards, assuming agents stay unmoved Parameters ---------- board : array initial board bombs : list list of initial bombs flames : list list of initial flames length : int length of the board sequence to simulate my_position : tuple position of my agent my_action : Action, optional my action at the first step can_kick : boolean, optional whether I can kick enemy_mobility : int, optional number of steps where enemies move nondeterministically Return ------ list_boards : list list of boards """ # Forward model to simulate model = ForwardModel() # Prepare initial state _board = board.copy() _bombs = deepcopy(bombs) _flames = deepcopy(flames) _items = dict() # we never know hidden items _actions = [constants.Action.Stop.value] * 4 if my_action is not None: agent = characters.Bomber() agent.agent_id = board[my_position] - 10 agent.position = my_position agent.can_kick = can_kick _agents = [agent] _actions[agent.agent_id] = my_action else: _agents = list() my_next_position = None # Get enemy positions to take into account their mobility rows, cols = np.where(_board > constants.Item.AgentDummy.value) enemy_positions = [position for position in zip(rows, cols) if position != my_position] # List of enemies enemies = list() for position in enemy_positions: agent = characters.Bomber() agent.agent_id = board[position] - 10 agent.position = position enemies.append(agent) _agents = _agents + enemies # Overwrite bomb over agent if they overlap for bomb in _bombs: _board[bomb.position] = constants.Item.Bomb.value # Simulate list_boards = [_board.copy()] for t in range(length): # Standard simulation step _board, _agents, _bombs, _, _flames \ = model.step(_actions, _board, _agents, _bombs, _items, _flames) # Overwrite passage over my agent when it has moved to a passage if t == 0 and len(_agents) > 0: agent = _agents[0] my_next_position = agent.position if all([agent.position != my_position, _board[agent.position] != constants.Item.Flames.value, _board[agent.position] != constants.Item.Bomb.value]): # I did not die and did not stay on a bomb _board[agent.position] = constants.Item.Passage.value # Overwrite bomb over agent if they overlap for bomb in _bombs: _board[bomb.position] = constants.Item.Bomb.value # Take into account the nondeterministic mobility of enemies if t < enemy_mobility: _enemy_positions = set() for x, y in enemy_positions: # for each enemy position in the previous step for dx, dy in [(0, 0), (1, 0), (-1, 0), (0, 1), (0, -1)]: # consider the next possible position next_position = (x + dx, y + dy) if not self._on_board(next_position): # ignore if out of board continue if any([utility.position_is_passage(_board, next_position), utility.position_is_powerup(_board, next_position), (next_position == my_position and utility.position_is_agent(_board, next_position) )]): # possible as a next position # TODO : what to do with my position _enemy_positions.add(next_position) _board[next_position] = constants.Item.AgentDummy.value enemy_positions = _enemy_positions _actions = [constants.Action.Stop.value] * 4 _agents = enemies list_boards.append(_board.copy()) return list_boards, my_next_position
def _find_reachable_items(self, list_boards, my_position, time_positions): """ Find items reachable from my position Parameters ---------- list_boards : list list of boards, generated by _board_sequence my_position : tuple my position, where the search starts time_positions : list survivable time-positions, generated by _search_time_expanded_network Return ------ items : dict items[item] : list of time-positions from which one can reach item reached : array minimum time to reach each position on the board next_to_items : dict next_to_items[item] : list of time-positions from which one can reach the position next to item """ # items found on time_positions and the boundary (for Wood) items = defaultdict(list) # reached[position] : minimum time to reach the position reached = np.full(self.board_shape, np.inf) # whether already checked the position _checked = np.full(self.board_shape, False) # positions next to wood or other agents (count twice if next to two woods) next_to_items = defaultdict(list) for t, positions in enumerate(time_positions): # check the positions reached at time t board = list_boards[t] for position in positions: if reached[position] < np.inf: continue reached[position] = t item = constants.Item(board[position]) items[item].append((t,) + position) _checked[position] = True x, y = position for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + row, y + col) if not self._on_board(next_position): continue if _checked[next_position]: continue _checked[next_position] = True if utility.position_is_agent(board, next_position): item = constants.Item(board[next_position]) items[item].append((t,)+next_position) next_to_items[item].append((t,) + position) # ignoring wall that will not exist when explode if utility.position_is_wood(list_boards[-1], next_position): item = constants.Item(board[next_position]) items[item].append((t,)+next_position) next_to_items[item].append((t,) + position) return items, reached, next_to_items
def _search_time_expanded_network(self, list_boards, my_position): """ Find survivable time-positions in the list of boards from my position Parameters ---------- list_boards : list list of boards, generated by _board_sequence my_position : tuple my position, where the search starts Return ------ survivable : list list of the set of survivable time-positions at each time survivable[t] : set of survivable positions at time t prev : list prev[t] : dict prev[t][position] : list of positions from which one can reach the position at time t """ depth = len(list_boards) # TODO : what to do with Fog? exclude = [constants.Item.Fog, constants.Item.Rigid, constants.Item.Wood, constants.Item.Bomb, constants.Item.Flames, constants.Item.AgentDummy] if list_boards[0][my_position] == constants.Item.Flames.value: return [set()] * depth, [list()] * depth # Forward search for reachable positions # reachable[(t,x,y]): whether can reach (x,y) at time t reachable = np.full((depth,) + self.board_shape, False) reachable[(0,)+my_position] = True next_positions = set([my_position]) my_position_get_flame = False for t in range(1, depth): if list_boards[t][my_position] == constants.Item.Flames.value: my_position_get_flame = True curr_positions = next_positions next_positions = set() # add all possible positions for curr_position in curr_positions: next_positions.add(curr_position) x, y = curr_position for row, col in [(0, 0), (-1, 0), (1, 0), (0, -1), (0, 1)]: next_positions.add((x + row, y + col)) for position in next_positions.copy(): if not self._on_board(position): # remove out of positions next_positions.remove(position) elif list_boards[t][position] == constants.Item.AgentDummy.value: # TODO: this may be too conservative # avoid contact to other agents next_positions.remove(position) elif position == my_position and not my_position_get_flame: # can stay even on bomb until getting flame continue elif utility.position_in_items(list_boards[t], position, exclude): # remove blocked next_positions.remove(position) elif utility.position_is_agent(list_boards[t], position): # if occupied by another agent next_positions.remove(position) for position in next_positions: reachable[(t,)+position] = True # Backward search for survivable positions # survivable[t]: set of survavable positions at time t # prev[t][position]: list of positions from which # one can reach the position at time t survivable = [set() for _ in range(depth)] survivable[-1] = next_positions prev = [defaultdict(list) for _ in range(depth+1)] for t in range(depth-1, 0, -1): for position in survivable[t]: # for each position surviving at time t # if the position is on a bomb, I must have stayed there since I placed the bomb if list_boards[t][position] == constants.Item.Bomb.value: if reachable[(t-1,)+position]: prev[t][position].append(position) continue # otherwise, standard case x, y = position for row, col in [(0, 0), (-1, 0), (1, 0), (0, -1), (0, 1)]: # consider the prev_position at time t - 1 prev_position = (x + row, y + col) if not self._on_board(prev_position): # discard the prev_position if out of board continue if reachable[(t-1,)+prev_position]: # can reach the position at time t # from the prev_position at time t-1 prev[t][position].append(prev_position) # the set of prev_positions at time t-1 # from which one can reach the surviving positions at time t survivable[t-1] = set([position for prevs in prev[t].values() for position in prevs]) return survivable, prev
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] enemy_position = dict() for enemy in my_enemies: positions = np.argwhere(board == enemy.value) if len(positions) == 0: continue enemy_position[enemy] = tuple(positions[0]) survivable_steps = defaultdict(int) # # survivable tree in standard case # list_boards_no_kick = deepcopy(info["list_boards_no_move"]) # remove myself if obs["bomb_blast_strength"][my_position]: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value my_survivable, my_prev, my_succ, my_survivable_with_enemy \ = self._get_survivable_with_enemy(list_boards_no_kick, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) for next_position in my_survivable[1]: my_action = self._get_direction(my_position, next_position) survivable_steps[my_action] = life[(1, ) + next_position] # # survivable tree if I lay bomb # if all([obs["ammo"] > 0, obs["bomb_life"][my_position] == 0]): # if I can lay a bomb board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(info["curr_bombs"]) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, info["curr_flames"], self._search_range, my_position, enemy_mobility=0) my_survivable_with_bomb, my_prev_with_bomb, my_succ_with_bomb, my_survivable_with_bomb_enemy \ = self._get_survivable_with_enemy(list_boards_with_bomb, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_bomb_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev_with_bomb[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) survivable_steps[constants.Action.Bomb] = life[(1, ) + my_position] print("survivable steps") print(survivable_steps) if survivable_steps: values = np.array(list(survivable_steps.values())) print(values) best_index = np.where(values == np.max(values)) best_actions = np.array(list(survivable_steps.keys()))[best_index] best_action = random.choice(best_actions) print("Most survivable action", best_action) return best_action.value else: print("No actions: stop") return constants.Action.Stop.value # # survivable tree if I kick # if my_kick: # Positions where I kick a bomb if I move to kickable, more_kickable = self._kickable_positions( obs, info["moving_direction"]) for next_position in set.union(*[kickable, more_kickable]): # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) my_survivable_with_kick[next_position], my_prev_with_kick[next_position], my_succ_with_bomb[next_position], my_survivable_with_kick_enemy[next_position] \ = self._get_survivable_with_enemy(list_boards_with_kick[1:], next_position, enemy_position) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(my_survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=0) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( my_succ, 1, next_position) most_survivable_action = None if survivable_actions: survivable_score = dict() for action in n_survivable: #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum( [n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break if most_survivable_action is not None: print("Most survivable action", most_survivable_action) return most_survivable_action.value # kick if possible if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print("Kickable", my_kick, kickable) while kickable: next_position = kickable.pop() action = self._get_direction(my_position, next_position) print("Must kick to survive", action) return action.value # move towards a teammate if she is blocking for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility._position_is_item(board, next_position, my_teammate): print("Must move to teammate to survive", action) return action.value # move towards an enemy for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_enemy(board, next_position, my_enemies): print("Must move to enemy to survive", action) return action.value # move towards anywhere besides ridid for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_rigid(board, next_position): continue if utility.position_is_wood(board, next_position): continue if utility.position_is_bomb(info["curr_bombs"], next_position): continue print("Try moving to survive", action) return action.value action = constants.Action.Stop print("Must die", action) return action
def act(self, obs, action_space, info): # # Definitions # board = info['recently_seen'] #board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_kick = obs["can_kick"] # whether I can kick my_enemies = [ constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy ] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None all_feasible_actions = [ a for a in info["my_next_position"] if info["my_next_position"][a] ] # positions that might be blocked if info["teammate_position"] is None: agent_positions = info["enemy_positions"] else: agent_positions = info["enemy_positions"] + [ info["teammate_position"] ] # # Fraction of blocked node in the survival trees of enemies # _list_boards = info["list_boards_no_move"] if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"], ignore_dying_agent=False) if info["teammate_position"] is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"], ignore_dying_agent=True) block = defaultdict(float) for action in [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: next_position = info["my_next_position"][action] if next_position is None: continue if next_position in info["all_kickable"]: # kick will be considered later continue block[action] = total_frac_blocked[next_position] if info["teammate_position"] is not None and block[action] > 0: block[action] *= (1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if all([my_ammo > 0, obs["bomb_blast_strength"][my_position] == 0]): list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) block[constants.Action.Bomb] \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes, board, my_enemies, ignore_dying_agent=False) block[constants.Action.Bomb] \ += total_frac_blocked[my_position] * (1 - block[constants.Action.Bomb]) if info["teammate_position"] is not None: block_teammate_with_bomb = self._get_frac_blocked_two_lists( list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) # this is an approximation block_teammate_with_bomb \ += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb) block[constants.Action.Bomb] *= (1 - block_teammate_with_bomb) if block[constants.Action.Bomb] > 0: block[constants.Action.Bomb] *= self._inv_tmp block[constants.Action.Bomb] -= np.log( -np.log(self.random.uniform())) block_teammate_with_kick = defaultdict(float) for next_position in info["all_kickable"]: my_action = self._get_direction(my_position, next_position) backedup = False if board[next_position] != constants.Item.Bomb.value: backup_cell = board[next_position] board[ next_position] = constants.Item.Bomb.value # an agent will be overwritten backedup = True list_boards_with_kick, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) if backedup: board[next_position] = backup_cell block[my_action] \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes, board, my_enemies) block[my_action] \ += total_frac_blocked[next_position] * (1 - block[my_action]) if block[my_action] > 0 and info["teammate_position"] is not None: block_teammate_with_kick[next_position] \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) # this is an approximation block_teammate_with_kick[next_position] \ += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick[next_position]) block[my_action] *= (1 - block_teammate_with_kick[next_position]) if block[my_action] > 0: block[my_action] *= self._inv_tmp block[my_action] -= np.log(-np.log(self.random.uniform())) n_survivable_move, is_survivable_move, list_boards_move \ = self._get_survivable(obs, info, my_position, info["my_next_position"], info["enemy_positions"], info["all_kickable"], allow_kick_to_fog=True, enemy_mobility=1, enemy_bomb=0, ignore_dying_agent=False, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) for a in all_feasible_actions: if a not in n_survivable_move: n_survivable_move[a] = np.zeros(self._search_range) enemy_can_place_bomb = any([ obs["bomb_blast_strength"][position] == 0 for position in info["enemy_positions"] ]) if enemy_can_place_bomb: n_survivable_bomb, is_survivable_bomb, list_boards_bomb \ = self._get_survivable(obs, info, my_position, info["my_next_position"], info["enemy_positions"], info["all_kickable"], allow_kick_to_fog=True, enemy_mobility=0, enemy_bomb=1, ignore_dying_agent=False, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) for a in all_feasible_actions: if a not in n_survivable_bomb: n_survivable_bomb[a] = np.zeros(self._search_range) might_survivable_actions = set( [a for a in n_survivable_bomb if n_survivable_bomb[a][-1] > 0] + [a for a in n_survivable_move if n_survivable_move[a][-1] > 0]) might_survivable_actions -= info["might_block_actions"] for a in info["might_block_actions"]: n_survivable_bomb[a] = np.zeros(self._search_range) n_survivable_move[a] = np.zeros(self._search_range) for a in might_survivable_actions: if a not in n_survivable_bomb: n_survivable_bomb[a] = np.zeros(self._search_range) if a not in n_survivable_move: n_survivable_move[a] = np.zeros(self._search_range) survivable_actions = list() for action in might_survivable_actions: if n_survivable_move[action][-1] > 0 and n_survivable_bomb[ action][-1] > 0: if not info["might_blocked"][action] or n_survivable_move[ constants.Action.Stop][-1] > 0: survivable_actions.append(action) n_survivable_expected = dict() for a in survivable_actions: if info["might_blocked"][a]: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 else: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + 2 * np.array( n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 n_survivable_expected[a] = n_survivable_expected[a] else: might_survivable_actions = set( [a for a in n_survivable_move if n_survivable_move[a][-1] > 0]) might_survivable_actions -= info["might_block_actions"] for a in info["might_block_actions"]: n_survivable_move[a] = np.zeros(self._search_range) survivable_actions = list() for action in might_survivable_actions: if n_survivable_move[action][-1] > 0: if not info["might_blocked"][action] or n_survivable_move[ constants.Action.Stop][-1] > 0: survivable_actions.append(action) for a in might_survivable_actions: if a not in n_survivable_move: n_survivable_move[a] = np.zeros(self._search_range) n_survivable_expected = dict() for a in survivable_actions: if info["might_blocked"][a]: n_survivable_expected[a] \ = np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 else: n_survivable_expected[a] = np.array(n_survivable_move[a]) # # Choose actions # if len(survivable_actions) == 1: action = survivable_actions.pop() return action.value if len(survivable_actions) > 1: most_survivable_actions = self._get_most_survivable_actions( n_survivable_expected) if len(most_survivable_actions) == 1: return most_survivable_actions[0].value elif len(most_survivable_actions) > 1: # tie break by block score max_block = 0 # do not choose 0 best_action = None for action in all_feasible_actions: if action not in most_survivable_actions: # for deterministic behavior continue if info["might_block_teammate"][action]: continue if block[action] > max_block: max_block = block[action] best_action = action if best_action is not None: return best_action.value # # no survivable actions for all cases # if enemy_can_place_bomb: n_survivable_expected = dict() for a in all_feasible_actions: if info["might_blocked"][a]: if is_survivable_move[constants.Action.Stop]: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_move[a]) else: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 else: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + 2 * np.array( n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 else: n_survivable_expected = dict() for a in all_feasible_actions: if info["my_next_position"][a] is None: continue if info["might_blocked"][a]: if is_survivable_move[constants.Action.Stop]: n_survivable_expected[a] \ = np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_move[a]) else: n_survivable_expected[a] = np.array( n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 else: n_survivable_expected[a] = np.array(n_survivable_move[a]) if len(might_survivable_actions) == 1: action = might_survivable_actions.pop() return action.value if len(might_survivable_actions) > 1: most_survivable_actions = self._get_most_survivable_actions( n_survivable_expected) if len(most_survivable_actions) == 1: return most_survivable_actions[0].value elif len(most_survivable_actions) > 1: # tie break by block score max_block = 0 # do not choose 0 best_action = None for action in all_feasible_actions: if action not in most_survivable_actions: # for deterministic behavior continue if info["might_block_teammate"][action]: continue if block[action] > max_block: max_block = block[action] best_action = action if best_action is not None: return best_action.value # no survivable action found for any cases # TODO : Then consider killing enemies or helping teammate max_block = 0 # do not choose 0 best_action = None for action in all_feasible_actions: if action not in block: # for deterministic behavior continue if info["might_block_teammate"][action]: continue if all([ action == constants.Action.Bomb, info["teammate_position"] is not None ]): if block_teammate_with_bomb > 0: continue next_position = info["my_next_position"][action] if all([ next_position in info["all_kickable"], block_teammate_with_kick[next_position] > 0 ]): continue if block[action] > max_block: max_block = block[action] best_action = action if best_action is not None: return best_action.value # longest survivable action longest_survivable_actions = self._get_longest_survivable_actions( n_survivable_expected) if len(longest_survivable_actions) == 1: return longest_survivable_actions[0].value elif len(longest_survivable_actions) > 1: # break tie by most survivable actions for a in n_survivable_expected: if a not in longest_survivable_actions: n_survivable_expected[a] = np.zeros(self._search_range) most_survivable_actions = self._get_most_survivable_actions( n_survivable_expected) if len(most_survivable_actions) == 1: return most_survivable_actions[0].value elif len(most_survivable_actions) > 1: if info["teammate_position"] is not None: min_block = np.inf best_action = None for a in all_feasible_actions: if a not in most_survivable_actions: # for deterministic behavior continue if a == constants.Action.Bomb: score = block_teammate_with_bomb # do not choose Bomb unless it is strictly better than others else: next_position = info["my_next_position"][a] if next_position in info["all_kickable"]: score = block_teammate_with_kick[ next_position] - self.random.uniform( 0, 1e-6) else: score = total_frac_blocked_teammate[ next_position] - self.random.uniform( 0, 1e-6) if score < min_block: min_block = score best_action = a if best_action is not None: return best_action.value else: # remove Bomb (as it is most affected by bugs) #most_survivable_actions = list(set(most_survivable_actions) - {constants.Action.Bomb}) most_survivable_actions = [ a for a in all_feasible_actions if a in most_survivable_actions and a != constants.Action.Bomb ] index = self.random.randint(len(most_survivable_actions)) random_action = most_survivable_actions[index] return random_action.value # The following will not be used self.random.shuffle(all_feasible_actions) if len(all_feasible_actions): action = all_feasible_actions[0] return action.value action = constants.Action.Stop return action.value
def act(self, obs, action_space, info): # # Definitions # board = info['last_seen'] #board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] kickable, might_kickable \ = self._kickable_positions(obs, info["moving_direction"], consider_agents=True) # enemy positions enemy_positions = list() for enemy in my_enemies: rows, cols = np.where(board == enemy.value) if len(rows) == 0: continue enemy_positions.append((rows[0], cols[0])) # teammate position teammate_position = None if my_teammate is not None: rows, cols = np.where(board == my_teammate.value) if len(rows): teammate_position = (rows[0], cols[0]) # positions that might be blocked if teammate_position is None: agent_positions = enemy_positions else: agent_positions = enemy_positions + [teammate_position] might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable) # # Survivability, when enemy is replaced by a bomb, and no move afterwards # # replace enemy with bomb _bombs = deepcopy(info["curr_bombs"]) rows, cols = np.where(board > constants.Item.AgentDummy.value) for position in zip(rows, cols): if board[position] not in my_enemies: continue if obs["bomb_blast_strength"][position]: # already a bomb continue bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb position, constants.DEFAULT_BOMB_LIFE, enemy_blast_strength_map[position], None) _bombs.append(bomb) n_survivable_bomb = self._get_n_survivable(board, _bombs, info["curr_flames"], obs, my_position, set.union( kickable, might_kickable), enemy_mobility=0) # # Survivability, when enemy moves one position or stay unmoved # n_survivable_move = self._get_n_survivable(board, info["curr_bombs"], info["curr_flames"], obs, my_position, set.union( kickable, might_kickable), enemy_mobility=1) # # Survivability, when no enemies # _board = deepcopy(board) agent_positions = np.where(_board > constants.Item.AgentDummy.value) _board[agent_positions] = constants.Item.Passage.value _board[my_position] = board[my_position] _obs = { "position": obs["position"], "blast_strength": obs["blast_strength"], "ammo": obs["ammo"], "bomb_life": obs["bomb_life"], "board": _board } n_survivable_none = self._get_n_survivable(_board, info["curr_bombs"], info["curr_flames"], _obs, my_position, set.union( kickable, might_kickable), enemy_mobility=0) # # Survivable actions # survivable_actions_bomb = set( [a for a in n_survivable_bomb if n_survivable_bomb[a][-1] > 0]) survivable_actions_move = set( [a for a in n_survivable_move if n_survivable_move[a][-1] > 0]) survivable_actions_none = set( [a for a in n_survivable_none if n_survivable_none[a][-1] > 0]) survivable_actions = set.intersection(survivable_actions_bomb, survivable_actions_move, survivable_actions_none) # if can survive without possibility of being blocked, then do so if not constants.Action.Stop in survivable_actions: _survivable_actions = [ action for action in survivable_actions if not might_blocked[action] ] if len(_survivable_actions): survivable_action = _survivable_actions _survivable_actions_bomb = [ action for action in survivable_actions_bomb if not might_blocked[action] ] _survivable_actions_move = [ action for action in survivable_actions_move if not might_blocked[action] ] _survivable_actions_none = [ action for action in survivable_actions_none if not might_blocked[action] ] if all([ len(_survivable_actions_bomb) > 0, len(_survivable_actions_move) > 0, len(_survivable_actions_none) > 0 ]): survivable_action_bomb = _survivable_actions_bomb survivable_action_move = _survivable_actions_move survivable_action_none = _survivable_actions_none # # Choose actions # if len(survivable_actions) == 1: action = survivable_actions.pop() if verbose: print("Only survivable action", action) return action.value if len(survivable_actions) > 1: n_survivable_expected = dict() for a in survivable_actions: if might_blocked[a]: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_none[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 elif a in [constants.Action.Stop, constants.Action.Bomb]: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 else: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + np.array(n_survivable_none[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 action = self._get_most_survivable_action(n_survivable_expected) if verbose: print("Most survivable action", action) return action.value # no survivable actions for all cases survivable_actions = set( list(n_survivable_bomb.keys()) + list(n_survivable_move.keys()) + list(n_survivable_none.keys())) if len(survivable_actions) == 1: action = survivable_actions.pop() if verbose: print("Only might survivable action", action) return action.value if len(survivable_actions) > 1: for a in set.union(survivable_actions, {constants.Action.Stop}): if a not in n_survivable_bomb: n_survivable_bomb[a] = np.zeros(self._search_range) if a not in n_survivable_move: n_survivable_move[a] = np.zeros(self._search_range) if a not in n_survivable_none: n_survivable_none[a] = np.zeros(self._search_range) n_survivable_expected = dict() for a in survivable_actions: if might_blocked[a]: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_none[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 elif a in [constants.Action.Stop, constants.Action.Bomb]: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 else: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + np.array(n_survivable_none[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 action = self._get_most_survivable_action(n_survivable_expected) if verbose: print("Most might survivable action", action) return action.value # no survivable action found for any cases # TODO : Then consider killing enemies or helping teammate # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) if teammate_position is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"]) block = defaultdict(float) for action in [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: next_position = self._get_next_position(my_position, action) if not self._on_board(next_position): continue if board[next_position] in [ constants.Item.Rigid.value, constants.Item.Wood.value ]: continue if next_position in set.union(kickable, might_kickable): # kick will be considered later continue block[action] = total_frac_blocked[next_position] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if any([my_ammo > 0, obs["bomb_blast_strength"][my_position] == 0]): list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb) block[constants.Action.Bomb] \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes, board, my_enemies) if teammate_position is not None: block_teammate = self._get_frac_blocked_two_lists( list_boards_with_bomb, n_survivable_nodes, board, [my_teammate]) block[constants.Action.Bomb] *= (1 - block_teammate) block[constants.Action.Bomb] *= self._inv_tmp block[constants.Action.Bomb] -= np.log( -np.log(self.random.uniform())) for next_position in set.union(kickable, might_kickable): my_action = self._get_direction(my_position, next_position) list_boards_with_kick, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True) block[my_action] \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes, board, my_enemies) if teammate_position is not None: block_teammate = self._get_frac_blocked_two_lists( list_boards_with_kick, n_survivable_nodes, board, [my_teammate]) block[my_action] *= (1 - block_teammate) block[my_action] *= self._inv_tmp block[my_action] -= np.log(-np.log(self.random.uniform())) max_block = -np.inf best_action = None for action in block: if block[action] > max_block: max_block = block[action] best_action = action if best_action is not None: if verbose: print( "Best action to kill enemies or help teammate (cannot survive)" ) return best_action.value # The following will not be used if obs["ammo"] > 0 and obs["blast_strength"] == 0: action = constants.Action.Bomb if verbose: print("Suicide", action) return action.value kickable_positions = list(set.union(kickable, might_kickable)) if kickable_positions: self.random.shuffle(kickable_positions) action = self._get_direction(my_position, kickable_positions[0]) if verbose: print("Suicide kick", action) return action.value all_actions = [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Right, constants.Action.Left ] self.random.shuffle(all_actions) for action in all_actions: next_position = self._get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_wall(board, next_position): continue if verbose: print("Random action", action) return action.value action = constants.Action.Stop if verbose: print("No action found", action) return action.value
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] recently_seen_positions = (info["since_last_seen"] < 3) board[recently_seen_positions] = info["last_seen"][recently_seen_positions] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None my_kick = obs["can_kick"] # whether I can kick if verbose: print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") my_next_position = {constants.Action.Stop: my_position, constants.Action.Bomb: my_position} for action in [constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right]: next_position = self._get_next_position(my_position, action) if self._on_board(next_position): if board[next_position] == constants.Item.Rigid.value: my_next_position[action] = None else: my_next_position[action] = next_position else: my_next_position[action] = None # # Understand current situation # if all([info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb], info["prev_position"] == my_position]): # if previously blocked, do not reapeat with some probability self._inv_tmp *= self._backoff else: self._inv_tmp = self._inv_tmp_init # enemy positions enemy_positions = list() for enemy in my_enemies: rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_positions.append((rows[0], cols[0])) # teammate position teammate_position = None if my_teammate is not None: rows, cols = np.where(board==my_teammate.value) if len(rows): teammate_position = (rows[0], cols[0]) # Positions where we kick a bomb if we move to if my_kick: kickable, might_kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() might_kickable = set() # positions that might be blocked if teammate_position is None: agent_positions = enemy_positions else: agent_positions = enemy_positions + [teammate_position] might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable) # enemy positions over time # these might be dissappeared due to extra flames if len(enemy_positions): rows = [p[0] for p in enemy_positions] cols = [p[1] for p in enemy_positions] list_enemy_positions = [(rows, cols)] _enemy_positions = list() for t in range(self._enemy_mobility): rows, cols = list_enemy_positions[-1] for x, y in zip(rows, cols): for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]: next_position = (x + dx, y + dy) if not self._on_board(next_position): continue _board = info["list_boards_no_move"][t] if utility.position_is_passage(_board, next_position): _enemy_positions.append(next_position) _enemy_positions = set(_enemy_positions) rows = [p[0] for p in _enemy_positions] cols = [p[1] for p in _enemy_positions] list_enemy_positions.append((rows, cols)) else: list_enemy_positions = [] # survivable actions is_survivable = dict() for a in self._get_all_actions(): is_survivable[a] = False n_survivable = dict() list_boards = dict() for my_action in self._get_all_actions(): next_position = my_next_position[my_action] if next_position is None: continue if my_action == constants.Action.Bomb: if any([my_ammo == 0, obs["bomb_blast_strength"][next_position] > 0]): continue if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1]): continue if all([my_action != constants.Action.Stop, obs["bomb_blast_strength"][next_position] > 0, next_position not in set.union(kickable, might_kickable)]): continue if next_position in set.union(kickable, might_kickable): # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue # list of boards from next steps list_boards[my_action], _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=my_action, can_kick=my_kick, enemy_mobility=self._enemy_mobility, enemy_bomb=self._enemy_bomb, agent_blast_strength=info["agent_blast_strength"]) # agents might be disappeared, because of overestimated bombs for t, positions in enumerate(list_enemy_positions): list_boards[my_action][t][positions] = constants.Item.AgentDummy.value # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards[my_action])): flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[my_action][t][flame_positions] = constants.Item.Flames.value """ processed = Parallel(n_jobs=-1, verbose=0)( [delayed(search_time_expanded_network)(list_boards[action][1:], my_next_position[action], action) for action in list_boards] ) for survivable, my_action in processed: if my_next_position[my_action] in survivable[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]] """ for my_action in list_boards: survivable = search_time_expanded_network(list_boards[my_action][1:], my_next_position[my_action]) if my_next_position[my_action] in survivable[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]] survivable_actions = list() for a in is_survivable: if not is_survivable[a]: continue if might_blocked[a] and not is_survivable[constants.Action.Stop]: continue if n_survivable[a][-1] <= 1: is_survivable[a] = False continue survivable_actions.append(a) # # Choose action # if len(survivable_actions) == 0: # # return None, if no survivable actions # return None elif len(survivable_actions) == 1: # # Choose the survivable action, if it is the only choice # action = survivable_actions[0] if verbose: print("The only survivable action", action) return action.value # # Bomb at a target # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) if teammate_position is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"]) """ np.set_printoptions(precision=3) print("enemy") print(total_frac_blocked) print("teammate") print(total_frac_blocked_teammate) print("product") prod = total_frac_blocked * (1 - total_frac_blocked_teammate) print(prod[:5,:5]) """ p_survivable = defaultdict(float) for action in n_survivable: p_survivable[action] = sum(n_survivable[action]) / self._my_survivability_threshold if p_survivable[action] > 1: p_survivable[action] = 1 block = defaultdict(float) for action in [constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right]: next_position = my_next_position[action] if next_position is None: continue if next_position in set.union(kickable, might_kickable): # kick will be considered later continue if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop]]): # if the next position is flames, # I want to stop to wait, which must be feasible block[action] = total_frac_blocked[next_position] * p_survivable[constants.Action.Stop] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) continue elif not is_survivable[action]: continue if all([might_blocked[action], not is_survivable[constants.Action.Stop]]): continue block[action] = total_frac_blocked[next_position] * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if might_blocked[action]: block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop] + total_frac_blocked[next_position] * p_survivable[action]) / 2 if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if is_survivable[constants.Action.Bomb]: list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb) n_survivable_nodes_with_bomb = defaultdict(int) for enemy in my_enemies: # get survivable tree of the enemy rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable = search_time_expanded_network(list_boards_with_bomb, enemy_position) n_survivable_nodes_with_bomb[enemy] = sum([len(positions) for positions in _survivable]) n_with_bomb = sum([n_survivable_nodes_with_bomb[enemy] for enemy in my_enemies]) n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked_with_bomb = 0 # place more bombs, so the stacked enemy cannot kick x, y = my_position for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + dx, y + dy) following_position = (x + 2 * dx, y + 2 * dy) if not self._on_board(following_position): continue if all([obs["bomb_life"][next_position] > 0, board[following_position] > constants.Item.AgentDummy.value]): total_frac_blocked_with_bomb = 1 else: total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none if teammate_position is not None: # get survivable tree of the teammate _survivable = search_time_expanded_network(list_boards_with_bomb, teammate_position) n_survivable_nodes_with_bomb_teammate = sum([len(positions) for positions in _survivable]) n_with_bomb = n_survivable_nodes_with_bomb_teammate n_with_none = n_survivable_nodes_teammate[my_teammate] if n_with_none == 0: total_frac_blocked_with_bomb_teammate = 0 else: total_frac_blocked_with_bomb_teammate = 1 - n_with_bomb / n_with_none action = constants.Action.Bomb block[action] = total_frac_blocked_with_bomb * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_with_bomb_teammate) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) for next_position in kickable: action = self._get_direction(my_position, next_position) if not is_survivable[action]: continue list_boards_with_kick, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=action, can_kick=True) n_survivable_nodes_with_kick = defaultdict(int) for enemy in my_enemies: # get survivable tree of the enemy rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable = search_time_expanded_network(list_boards_with_kick, enemy_position) n_survivable_nodes_with_kick[enemy] = sum([len(positions) for positions in _survivable]) n_with_kick = sum([n_survivable_nodes_with_kick[enemy] for enemy in my_enemies]) n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked[next_position] = 0 else: total_frac_blocked[next_position] = 1 - n_with_kick / n_with_none if teammate_position is not None: # get survivable tree of the teammate _survivable = search_time_expanded_network(list_boards_with_kick, teammate_position) n_survivable_nodes_with_kick_teammate = sum([len(positions) for positions in _survivable]) n_with_kick = n_survivable_nodes_with_kick_teammate n_with_none = n_survivable_nodes_teammate[my_teammate] if n_with_none == 0: total_frac_blocked_teammate[next_position] = 0 else: total_frac_blocked_teammate[next_position] = 1 - n_with_kick / n_with_none block[action] = total_frac_blocked[next_position] * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) max_block = -np.inf best_action = None for action in block: if block[action] > max_block: max_block = block[action] best_action = action if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp: if teammate_position is not None: teammate_safety = total_frac_blocked_with_bomb_teammate * n_survivable_nodes_with_bomb_teammate if any([teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_with_bomb_teammate < self._interfere_threshold, total_frac_blocked_with_bomb_teammate < total_frac_blocked_teammate[my_position]]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if teammate_ok: if best_action == constants.Action.Bomb: if verbose: print("Bomb is best", constants.Action.Bomb) return constants.Action.Bomb.value if best_action == constants.Action.Stop: if verbose: print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = my_next_position[best_action] should_chase = (total_frac_blocked[next_position] > self._chase_threshold) if teammate_position is not None: teammate_safety = total_frac_blocked_teammate[next_position] * n_survivable_nodes_teammate[my_teammate] if any([teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_teammate[next_position] < self._interfere_threshold, total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position]]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if should_chase and teammate_ok: if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop]]): action = constants.Action.Stop if verbose: print("Wait flames life", action) return action.value else: if verbose: print("Move towards better place to bomb", best_action) return best_action.value # Exclude the action representing stop to wait max_block = -np.inf best_action = None for action in survivable_actions: if block[action] > max_block: max_block = block[action] best_action = action # # Do not take risky actions # most_survivable_action = self._action_most_survivable(n_survivable) # ignore actions with low survivability _survivable_actions = list() for action in n_survivable: n = sum(n_survivable[action]) if not is_survivable[action]: continue elif n > self._my_survivability_threshold: _survivable_actions.append(action) else: print("RISKY", action) is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: if verbose: print("Take the best action in danger", best_action) return best_action.value else: # Take the most survivable action if verbose: print("Take the most survivable action", most_survivable_action) return most_survivable_action.value # # Do not interfere with teammate # if all([teammate_position is not None, len(enemy_positions) > 0 or len(info["curr_bombs"]) > 0]): # ignore actions that interfere with teammate min_interfere = np.inf least_interfere_action = None _survivable_actions = list() for action in survivable_actions: if action == constants.Action.Bomb: frac = total_frac_blocked_with_bomb_teammate else: next_position = my_next_position[action] frac = total_frac_blocked_teammate[next_position] if frac < min_interfere: min_interfere = frac least_interfere_action = action if frac < self._interfere_threshold: _survivable_actions.append(action) else: print("INTERFERE", action) is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: # Take the least interfering action if verbose: print("Take the best action in intereference", best_action) return best_action.value else: if verbose: print("Take the least interfering action", least_interfere_action) return least_interfere_action.value consider_bomb = True if not is_survivable[constants.Action.Bomb]: consider_bomb = False # # Find reachable items # # List of boards simulated list_boards, _ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=self._enemy_mobility, enemy_bomb=self._enemy_bomb, agent_blast_strength=info["agent_blast_strength"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, _, _ = self._search_time_expanded_network(list_boards, my_position) if len(survivable[-1]) == 0: survivable = [set() for _ in range(len(survivable))] # Items and bomb target that can be reached in a survivable manner _, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # # If I have seen an enemy recently and cannot see him now, them move to the last seen position # action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info, my_enemies) if action is not None: if verbose: print("Moving toward last seen enemy", action) return action.value # # If I have seen a teammate recently, them move away from the last seen position # action = self._action_away_from_teammate(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info, my_teammate) if action is not None: if verbose: print("Moving away from last seen teammate", action) return action.value # # Move towards a fog where we have not seen longest # action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: #if True: if self.random.uniform() < 0.8: if verbose: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # max_block = -np.inf best_action = None for action in survivable_actions: if action == constants.Action.Bomb: continue if block[action] > max_block: max_block = block[action] best_action = action if verbose: print("Take the best action among safe actions (nothing else to do)", best_action) if best_action is None: # this should not be the case return None else: return best_action.value
def _kickable_positions(self, obs, moving_direction, consider_agents=True): """ Parameters ---------- obs : dict pommerman observation """ if not obs["can_kick"]: return set() kickable = set() # my position x, y = obs["position"] # Find neigoboring positions around me on_board_next_positions = list() for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + dx, y + dy) if self._on_board(next_position): on_board_next_positions.append(next_position) # Check if can kick a static bomb for next_position in on_board_next_positions: if obs["board"][next_position] != constants.Item.Bomb.value: # not a bomb continue if moving_direction[next_position] is not None: # moving continue if obs["bomb_life"][next_position] <= 1: # kick and die continue following_position = (2 * next_position[0] - x, 2 * next_position[1] - y) if not self._on_board(following_position): # cannot kick to that direction continue if not utility.position_is_passage(obs["board"], following_position): # cannot kick to that direction continue might_blocked = False if consider_agents: # neighboring agent might block (or change the direction) immediately for dx, dy in [(-1, -1), (1, -1), (-1, 1), (1, 1)]: neighboring_position = (x + dx, y + dy) if not self._on_board(neighboring_position): continue if np.sum( np.abs( np.array(neighboring_position) - np.array(next_position))) != 1: continue if utility.position_is_agent(obs["board"], neighboring_position): print("agent is blocking at", neighboring_position) might_blocked = True break if might_blocked: continue for dx, dy in [(-1, -1), (1, -1), (-1, 1), (1, 1)]: neighboring_position = (next_position[0] + dx, next_position[1] + dy) if not self._on_board(neighboring_position): continue if np.sum( np.abs( np.array(neighboring_position) - np.array(following_position))) != 1: continue if utility.position_is_agent(obs["board"], neighboring_position): print("agent is blocking at", neighboring_position) might_blocked = True break if might_blocked: continue print("can kick a static bomb at", next_position) kickable.add(next_position) # Check if can kick a moving bomb for next_position in on_board_next_positions: if next_position in kickable: # can kick a static bomb continue x, y = next_position for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: coming_position = (x + dx, y + dy) if coming_position == obs["position"]: # cannot come from my position continue if not self._on_board(coming_position): # cannot come from out of board continue #if obs["bomb_life"][coming_position] <= 1: # # kick and die # continue if all([ moving_direction[coming_position] == constants.Action.Up, dx == 1, dy == 0 ]): # coming from below print("can kick a moving bomb coming from below at", next_position) kickable.add(next_position) break if all([ moving_direction[coming_position] == constants.Action.Down, dx == -1, dy == 0 ]): # coming from above print("can kick a moving bomb coming from", coming_position, "above to", next_position) kickable.add(next_position) break if all([ moving_direction[coming_position] == constants.Action.Right, dx == 0, dy == -1 ]): # coming from left print("can kick a moving bomb coming from left at", next_position) kickable.add(next_position) break if all([ moving_direction[coming_position] == constants.Action.Left, dx == 0, dy == 1 ]): # coming from right print("can kick a moving bomb coming from right at", next_position) break return kickable
def _get_bombs(self, board, bomb_blast_strength, prev_bomb_blast_strength, bomb_life, prev_bomb_life): """ Summarize information about bombs Parameters ---------- board : array bomb_blast_strength : array bomb_life : array prev_bomb_life : array remaining life of bombs at the previous step Return ------ curr_bombs : list list of bombs moving_direction : array array of moving direction of bombs moving_direction[position] : direction of bomb at position bomb_life : array Copy the remaining life of bombs for the next step """ # Keep bombs under fog bomb_positions_under_fog = np.where( (prev_bomb_life > 1) * (board == constants.Item.Fog.value)) bomb_life[bomb_positions_under_fog] = prev_bomb_life[ bomb_positions_under_fog] - 1 bomb_blast_strength[ bomb_positions_under_fog] = prev_bomb_blast_strength[ bomb_positions_under_fog] # Prepare information about moving bombs # diff = 0 if no bomb -> no bomb # diff = 1 if the remaining life of a bomb is decremented # diff = -9 if no bomb -> new bomb diff = prev_bomb_life - bomb_life moving = (diff != 0) * (diff != 1) * (diff != -9) # move_from: previous positions of moving bombs rows, cols = np.where(moving * (diff > 0)) move_from = [position for position in zip(rows, cols)] # move_to: current positions of moving bombs rows, cols = np.where(moving * (diff < 0)) move_to = [position for position in zip(rows, cols)] # TODO : Consider bombs moving into fog matched_move_from = [False] * len(move_from) curr_bombs = list() rows, cols = np.where(bomb_life > 0) moving_direction = np.full(self.board_shape, None) for position in zip(rows, cols): this_bomb_life = bomb_life[position] if position in move_to: # then the bomb is moving, so find the moving direction for i, prev_position in enumerate(move_from): if prev_bomb_life[prev_position] != this_bomb_life + 1: # the previous life of the bomb at the previous position # must be +1 of the life of this bomb continue dx = position[0] - prev_position[0] dy = position[1] - prev_position[1] if abs(dx) + abs(dy) == 2: # this can be a moving bomb whose direction is changed by kick agent_position = (prev_position[0] + dx, prev_position[1]) if utility.position_is_agent(board, agent_position): # the agent must have kicked print("agent must have kicked at", agent_position) moving_direction[position] = self._get_direction( agent_position, position) break agent_position = (prev_position[0], prev_position[1] + dy) if utility.position_is_agent(board, agent_position): # the agent must have kicked print("agent must have kicked at", agent_position) moving_direction[position] = self._get_direction( agent_position, position) break if abs(dx) + abs(dy) != 1: # the previous position must be 1 manhattan distance # from this position continue moving_direction[position] = self._get_direction( prev_position, position) # TODO: there might be multiple possibilities of # where the bomb came from matched_move_from[i] = True break bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb position, this_bomb_life, int(bomb_blast_strength[position]), moving_direction[position]) curr_bombs.append(bomb) return curr_bombs, moving_direction
def act(self, obs, action_space, info): # # Definitions # #board = obs['board'] board = info["recently_seen"] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [ constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy ] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None my_kick = obs["can_kick"] # whether I can kick # # Understand current situation # # positions that might be blocked if info["teammate_position"] is None: agent_positions = info["enemy_positions"] else: agent_positions = info["enemy_positions"] + [ info["teammate_position"] ] # survivable actions if len(info["enemy_positions"]) > 0: mobility = self._enemy_mobility else: mobility = 0 n_survivable, is_survivable, list_boards \ = self._get_survivable(obs, info, my_position, info["my_next_position"], agent_positions, info["all_kickable"], allow_kick_to_fog=False, enemy_mobility=mobility, enemy_bomb=self._enemy_bomb, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) for a in info["might_block_actions"]: n_survivable[a] = np.zeros(self._search_range) is_survivable[a] = False survivable_actions = list() for a in is_survivable: if not is_survivable[a]: continue if info["might_blocked"][a] and not is_survivable[ constants.Action.Stop]: continue if n_survivable[a][-1] <= 1: is_survivable[a] = False continue survivable_actions.append(a) # # Choose action # if len(survivable_actions) == 0: # # return None, if no survivable actions # return None elif len(survivable_actions) == 1: # # Choose the survivable action, if it is the only choice # action = survivable_actions[0] return action.value if all([ info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb], info["prev_position"] == my_position ]): # if previously blocked, do not reapeat with some probability self._inv_tmp *= self._backoff else: self._inv_tmp = self._inv_tmp_init # # Bomb at a target # # fraction of blocked node in the survival trees of enemies _list_boards = info["list_boards_no_move"] if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) if info["teammate_position"] is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"]) if n_survivable_nodes_teammate[my_teammate] > 0: LB = self._teammate_survivability_threshold / n_survivable_nodes_teammate[ my_teammate] positions_teammate_safe = np.where( total_frac_blocked_teammate < LB) total_frac_blocked_teammate[positions_teammate_safe] = 0 p_survivable = defaultdict(float) for action in n_survivable: p_survivable[action] = sum( n_survivable[action]) / self._my_survivability_threshold if p_survivable[action] > 1: p_survivable[action] = 1 block = defaultdict(float) for action in [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: next_position = info["my_next_position"][action] if next_position is None: continue if next_position in info["all_kickable"]: # kick will be considered later continue if all([ utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop] ]): # if the next position is flames, # I want to stop to wait, which must be feasible block[action] = total_frac_blocked[ next_position] * p_survivable[constants.Action.Stop] if info["teammate_position"] is not None: block[action] *= ( 1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) continue elif not is_survivable[action]: continue if all([ info["might_blocked"][action], not is_survivable[constants.Action.Stop] ]): continue block[action] = total_frac_blocked[next_position] * p_survivable[ action] if info["teammate_position"] is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if info["might_blocked"][action]: block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop] + total_frac_blocked[next_position] * p_survivable[action]) / 2 if info["teammate_position"] is not None: block[action] *= ( 1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if is_survivable[constants.Action.Bomb]: list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) n_survivable_nodes_with_bomb = defaultdict(int) for enemy_position in info["enemy_positions"]: # get survivable tree of the enemy _survivable = search_time_expanded_network( list_boards_with_bomb, enemy_position) n_survivable_nodes_with_bomb[enemy_position] = sum( [len(positions) for positions in _survivable]) n_with_bomb = sum([ n_survivable_nodes_with_bomb[enemy_position] for enemy_position in info["enemy_positions"] ]) n_with_none = sum( [n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked_with_bomb = 0 # place more bombs, so the stacked enemy cannot kick x, y = my_position for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + dx, y + dy) following_position = (x + 2 * dx, y + 2 * dy) if not self._on_board(following_position): continue if all([ obs["bomb_life"][next_position] > 0, board[following_position] > constants.Item.AgentDummy.value ]): total_frac_blocked_with_bomb = 1 else: total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none action = constants.Action.Bomb block[action] = total_frac_blocked_with_bomb # block[action] += total_frac_blocked[my_position] * (eisenachAgents - total_frac_blocked_with_bomb) block[action] *= p_survivable[action] block_teammate_with_bomb = None if block[action] > 0: if info["teammate_position"] is not None: block_teammate_with_bomb \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) block_teammate_with_bomb \ += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb) block[action] *= (1 - block_teammate_with_bomb) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) for next_position in info["kickable"]: action = self._get_direction(my_position, next_position) if not is_survivable[action]: continue list_boards_with_kick, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=action, can_kick=True, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) block[action] \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes, board, my_enemies, ignore_dying_agent=True) block[action] += total_frac_blocked[next_position] * ( 1 - block[action]) block[action] *= p_survivable[action] if block[action] > 0: if info["teammate_position"] is not None: block_teammate_with_kick \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) block_teammate_with_kick \ += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick) block[action] *= (1 - block_teammate_with_kick) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) max_block = 0 # do not choose zero blocking action as the best best_action = None for action in block: if block[action] > max_block: max_block = block[action] best_action = action if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp: if info["teammate_position"] is not None: if block_teammate_with_bomb is None: block_teammate_with_bomb \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) teammate_safety = block_teammate_with_bomb * n_survivable_nodes_teammate[ my_teammate] if any([ teammate_safety > self._teammate_survivability_threshold, block_teammate_with_bomb < self._interfere_threshold, block_teammate_with_bomb < total_frac_blocked_teammate[my_position] ]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if teammate_ok: if best_action == constants.Action.Bomb: return constants.Action.Bomb.value if best_action == constants.Action.Stop: return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = info["my_next_position"][best_action] should_chase = (total_frac_blocked[next_position] > self._chase_threshold) if info["teammate_position"] is not None: teammate_safety = total_frac_blocked_teammate[ next_position] * n_survivable_nodes_teammate[my_teammate] if any([ teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_teammate[next_position] < self._interfere_threshold, total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position] ]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if should_chase and teammate_ok: if all([ utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop] ]): action = constants.Action.Stop return action.value else: return best_action.value # Exclude the action representing stop to wait max_block = 0 # do not choose zero blocking action as the best best_action = None for action in survivable_actions: if block[action] > max_block: max_block = block[action] best_action = action # # Do not take risky actions when not interacting with enemies # most_survivable_action = self._action_most_survivable(n_survivable) if total_frac_blocked[my_position] > 0: # ignore actions with low survivability _survivable_actions = list() for action in n_survivable: n = sum(n_survivable[action]) if not is_survivable[action]: continue elif n > self._my_survivability_threshold: _survivable_actions.append(action) else: is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: return best_action.value else: # Take the most survivable action return most_survivable_action.value # # Do not interfere with teammate # if all([ info["teammate_position"] is not None, len(info["enemy_positions"]) > 0 or len(info["curr_bombs"]) > 0 ]): # ignore actions that interfere with teammate min_interfere = np.inf least_interfere_action = None _survivable_actions = list() for action in survivable_actions: if action == constants.Action.Bomb: """ if block_teammate_with_bomb is None: block_teammate_with_bomb \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) frac = block_teammate_with_bomb """ continue else: next_position = info["my_next_position"][action] frac = total_frac_blocked_teammate[next_position] if frac < min_interfere: min_interfere = frac least_interfere_action = action if frac < self._interfere_threshold: _survivable_actions.append(action) else: is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: # Take the least interfering action return best_action.value else: return least_interfere_action.value consider_bomb = True if not is_survivable[constants.Action.Bomb]: consider_bomb = False # # Find reachable items # # List of boards simulated list_boards, _ = self._board_sequence( board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=mobility, enemy_bomb=self._enemy_bomb, enemy_positions=agent_positions, agent_blast_strength=info["agent_blast_strength"], step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where( info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, _, _ = self._search_time_expanded_network( list_boards, my_position) if len(survivable[-1]) == 0: survivable = [set() for _ in range(len(survivable))] # Items and bomb target that can be reached in a survivable manner if "escape" in info: reachable_items, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable, might_powerup=info["escape"]) # might_powerup is the escape from collapse else: _, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # # If I have seen an enemy recently and cannot see him now, them move to the last seen position # action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: return action.value # # If I have seen a teammate recently, them move away from the last seen position # action = self._action_away_from_teammate( my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: return action.value # # Move to the places that will not be collapsed # if "escape" in info: # might_powerup is the escape from collapse action = self._action_to_might_powerup(my_position, reachable_items, prev, is_survivable) if action is not None: print("Escape from collapse", action) return action.value # # Move towards a fog where we have not seen longest # action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: #if True: if self.random.uniform() < 0.8: return action.value # # Choose most survivable action # max_block = 0 best_action = None for action in survivable_actions: if action == constants.Action.Bomb: continue score = block[action] if action != constants.Action.Bomb: score += np.random.uniform(0, 1e-3) if score > max_block: max_block = score best_action = action if best_action is None: max_p = 0 best_action = None for action in p_survivable: score = p_survivable[action] if action != constants.Action.Bomb: score += np.random.uniform(0, 1e-3) if score > max_p: max_p = score best_action = action if best_action is None: # this should not be the case return None else: return best_action.value
def act(self, obs, action_space, info): # # Definitions # enemy_mobility = 4 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_kick = obs["can_kick"] # whether I can kick print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") # # Understand current situation # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) bomb_target_enemy = (total_frac_blocked > 0) # List of boards simulated list_boards, _ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=enemy_mobility) # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(list_boards, my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=enemy_mobility) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=enemy_mobility) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable_with_kick[1:]] kick_actions.append(my_action) else: kickable = set() if len(survivable_actions) == 0: return None # # bomb target that can be reached in a survivable manner # reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable, bomb_target_enemy) # # Evaluate the survivability of each action # x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [len(s) for s in survivable_with_bomb[1:]] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable(succ, 1, next_position) #if True: if verbose: print("n_survivable") for a in n_survivable: print(a, n_survivable[a]) # # Choose the survivable action, if it is the only choice # if len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value # # Bomb if it has dominating survivability # if is_survivable[constants.Action.Bomb]: bomb_is_most_survivable = True bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb])) for action in n_survivable: if action == constants.Action.Bomb: continue action_sorted = np.array(sorted(n_survivable[action])) if any(action_sorted > bomb_sorted): bomb_is_most_survivable = False break if bomb_is_most_survivable: action = constants.Action.Bomb print("Bomb to survive", action) return action.value # # Bomb at a target # consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]): # if not sufficiently survivable all the time after bomb, do not bomb consider_bomb = False # # Place a bomb # best_action = None max_block = 0 for action in survivable_actions: if action == constants.Action.Stop: continue next_position = self._get_next_position(my_position, action) block = total_frac_blocked[next_position] if block > max_block: max_block = block best_action = action if consider_bomb and best_action == constants.Action.Stop: print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Stop]: print("Move towards better place to bomb", best_action) return best_action.value good_time_positions = reachable_items["target"] if good_time_positions: score = [total_frac_blocked[(x,y)] / (t+1) for t, x, y in good_time_positions] argmax = np.argwhere(score==np.max(score)) best_time_positions = [good_time_positions[i[0]] for i in argmax] action = self._find_distance_minimizer(my_position, best_time_positions, #good_time_positions, prev, is_survivable) if action is not None: print("Moving toward where to bomb", action) return action.value # # Kick # for my_action in kick_actions: if my_action == constants.Action.Up: next_position = (my_position[0] - 1, my_position[1]) elif my_action == constants.Action.Down: next_position = (my_position[0] + 1, my_position[1]) elif my_action == constants.Action.Right: next_position = (my_position[0], my_position[1] + 1) elif my_action == constants.Action.Left: next_position = (my_position[0], my_position[1] - 1) # do not kick a bomb if it will break enemies if info["moving_direction"][next_position] is None: print("checking static bomb") # if it is a static bomb if self._can_break(info["list_boards_no_move"][0], next_position, my_blast_strength, my_enemies): continue list_boards_with_kick_no_move, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) for enemy in my_enemies: rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable, _, _, _ \ = self._search_time_expanded_network(list_boards_with_kick_no_move, enemy_position) n_survivable_nodes_with_kick = sum([len(positions) for positions in _survivable]) if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]: print("Kicking to reduce the survivability", n_survivable_nodes[enemy], "->", n_survivable_nodes_with_kick, my_action) return my_action.value # # Move towards a fog where we have not seen longest # best_time_position = None oldest = 0 for t, x, y in next_to_items[constants.Item.Fog]: neighbors = [(x+dx, y+dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]] age = max([info["since_last_seen"][position] for position in neighbors if self._on_board(position)]) if age > oldest: oldest = age best_time_position = (t, x, y) if best_time_position is not None: action = self._find_distance_minimizer(my_position, [best_time_position], prev, is_survivable) if action is not None: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # survivable_score = dict() for action in n_survivable: # survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum([n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) most_survivable_action = None random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break print("Most survivable action", most_survivable_action) return most_survivable_action.value