def _djikstra(board, my_position, bombs, enemies, depth=None, exclude=None): assert (depth is not None) if exclude is None: exclude = [ constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames ] def out_of_range(p_1, p_2): '''Determines if two points are out of rang of each other''' x_1, y_1 = p_1 x_2, y_2 = p_2 return abs(y_2 - y_1) + abs(x_2 - x_1) > depth items = defaultdict(list) dist = {} prev = {} Q = queue.PriorityQueue() my_x, my_y = my_position for r in range(max(0, my_x - depth), min(len(board), my_x + depth)): for c in range(max(0, my_y - depth), min(len(board), my_y + depth)): position = (r, c) if any([ out_of_range(my_position, position), utility.position_in_items(board, position, exclude), ]): continue if position == my_position: dist[position] = 0 else: dist[position] = np.inf prev[position] = None Q.put((dist[position], position)) for bomb in bombs: if bomb['position'] == my_position: items[constants.Item.Bomb].append(my_position) while not Q.empty(): _, position = Q.get() if utility.position_is_passable(board, position, enemies): x, y = position val = dist[(x, y)] + 1 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + x, col + y) if new_position not in dist: continue if val < dist[new_position]: dist[new_position] = val prev[new_position] = position item = constants.Item(board[position]) items[item].append(position) return items, dist, prev
def is_stuck_direction(next_position, bomb_range, next_board, enemies): Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() nx, ny = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) px, py = position if nx != px and ny != py: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + px, col + py) if new_position in seen: continue if not utility.position_on_board(next_board, new_position): continue if not utility.position_is_passable(next_board, new_position, enemies): continue dist = abs(row + px - nx) + abs(col + py - ny) Q.put((dist, new_position)) return is_stuck
def step_to(obs, new_position, lay_bomb=False): """return: a copy of new observation after stepping into new_position. If lay_bomb==True, it is actually two-step change (i.e., lay bomb then go to new_position) """ assert (utility.position_is_passable(obs['board'], new_position, obs['enemies'])) new_obs = copy.deepcopy(obs) sz = len(obs['board']) old_board = obs['board'] old_position = obs['position'] old_position_value = constants.Item.Bomb.value if not lay_bomb: #if not lay bomb, the agent could on a bomb, making a 1-step move to new_position old_position_value = constants.Item.Bomb.value if obs['bomb_life'][old_position] > 0 else \ constants.Item.Passage.value #1.move agent to new position, 2. update board, bomb_blast_st, bomb_life, position new_obs['position'] = new_position #update position new_obs['board'][old_position] = old_position_value # update board agent_id = old_board[old_position] new_obs['board'][new_position] = agent_id # update board if lay_bomb: new_obs['bomb_blast_strength'][old_position] = obs[ 'blast_strength'] #update blast_st new_obs['bomb_life'][ old_position] = constants.DEFAULT_BOMB_LIFE #update bomb_life for i in range(sz): for j in range(sz): time_step = 2 if lay_bomb else 1 if new_obs['bomb_life'][i, j] < 2: continue new_obs['bomb_life'][i, j] = max( 1, new_obs['bomb_life'][i, j] - time_step) return new_obs
def _filter_invalid_directions(board, my_position, directions, enemies): ret = [] for direction in directions: position = utility.get_next_position(my_position, direction) if utility.position_on_board( board, position) and utility.position_is_passable( board, position, enemies): ret.append(direction) return ret
def _filter_legal_actions(state): my_position = tuple(state['position']) board = np.array(state['board']) enemies = [constants.Item(e) for e in state['enemies']] ret = [constants.Action.Bomb] for direction in directions: position = utility.get_next_position(my_position, direction) if utility.position_on_board( board, position) and utility.position_is_passable( board, position, enemies): ret.append(direction) return ret
def valid_directions(obs): res = [0] * 6 pos = obs['position'] board = obs['board'] enemies = obs['enemies'] for act in dirs: next_pos = util.get_next_position(pos, act) if util.position_on_board(board, next_pos) and util.position_is_passable( board, next_pos, enemies): res[act.value] = 1 else: res[act.value] = -1 return res
def is_stuck_direction(next_position, bomb_range, next_board, enemies): '''Helper function to do determine if the agents next move is possible.''' Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() next_x, next_y = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) #FIXME is_stuck=False position_x, position_y = position if next_x != position_x and next_y != position_y: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + position_x, col + position_y) if new_position in seen: continue if not utility.position_on_board(next_board, new_position): continue if not utility.position_is_passable( next_board, new_position, enemies): continue dist = abs(row + position_x - next_x) + abs(col + position_y - next_y) Q.put((dist, new_position)) return is_stuck
def CanGo(bfs_node, action, enemies): position_x, position_y = utility.get_next_position(bfs_node.my_position, action) if not utility.is_valid_direction(bfs_node.env._board, (position_x, position_y), action): return False if not utility.position_is_passable(bfs_node.env._board, (position_x, position_y), enemies): return False can_go = True bombs = bfs_node.env._bombs for bomb in bombs: bomb_x, bomb_y = bomb.position safe_from_bomb = False if bomb.life > bomb.blast_strength + 2: continue if (bomb_x == position_x and abs(bomb_y - position_y) <= bomb.blast_strength): for pix_y in range(min(bomb_y, position_y), max(bomb_y, position_y)): if utility.position_is_wall(bfs_node.env._board, (position_x, pix_y)): safe_from_bomb = True break elif (bomb_y == position_y and abs(bomb_x - position_x) <= bomb.blast_strength): for pix_x in range(min(bomb_x, position_x), max(bomb_x, position_x)): if utility.position_is_wall(bfs_node.env._board, (pix_x, position_y)): safe_from_bomb = True break else: continue if not safe_from_bomb: can_go = False return can_go
def _find_safe_directions(self, board, my_position, unsafe_directions, bombs, enemies): def is_stuck_direction(next_position, bomb_range, next_board, enemies): '''Helper function to do determine if the agents next move is possible.''' Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() next_x, next_y = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) position_x, position_y = position if next_x != position_x and next_y != position_y: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + position_x, col + position_y) if new_position in seen: continue if not utility.position_on_board(next_board, new_position): continue if not utility.position_is_passable( next_board, new_position, enemies): continue dist = abs(row + position_x - next_x) + abs(col + position_y - next_y) Q.put((dist, new_position)) return is_stuck # All directions are unsafe. Return a position that won't leave us locked. safe = [] if len(unsafe_directions) == 4: next_board = board.copy() next_board[my_position] = constants.Item.Bomb.value for direction, bomb_range in unsafe_directions.items(): next_position = utility.get_next_position( my_position, direction) next_x, next_y = next_position if not utility.position_on_board(next_board, next_position) or \ not utility.position_is_passable(next_board, next_position, enemies): continue if not is_stuck_direction(next_position, bomb_range, next_board, enemies): # We found a direction that works. The .items provided # a small bit of randomness. So let's go with this one. return [direction] if not safe: safe = [constants.Action.Stop] return safe x, y = my_position disallowed = [] # The directions that will go off the board. for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: position = (x + row, y + col) direction = utility.get_direction(my_position, position) # Don't include any direction that will go off of the board. if not utility.position_on_board(board, position): disallowed.append(direction) continue # Don't include any direction that we know is unsafe. if direction in unsafe_directions: continue if utility.position_is_passable( board, position, enemies) or utility.position_is_fog( board, position): safe.append(direction) if not safe: # We don't have any safe directions, so return something that is allowed. safe = [k for k in unsafe_directions if k not in disallowed] if not safe: # We don't have ANY directions. So return the stop choice. return [constants.Action.Stop] return safe
def position_is_not_passible(board, position, enemies): return not utility.position_is_passable(board, position, enemies)
def _djikstra(board, my_position, bombs, enemies, depth=None, exclude=None): """ Dijkstra method Parameters ---------- board = np.array(obs['board']) my_position = tuple(obs['position']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [constants.Item(e) for e in obs['enemies']] """ if depth is None: depth = len(board) * 2 if exclude is None: exclude = [ constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames ] def out_of_range(p1, p2): x1, y1 = p1 x2, y2 = p2 return abs(y2 - y1) + abs(x2 - x1) > depth items = defaultdict(list) for bomb in bombs: if bomb['position'] == my_position: items[constants.Item.Bomb].append(my_position) dist = {} prev = {} mx, my = my_position for r in range(max(0, mx - depth), min(len(board), mx + depth)): for c in range(max(0, my - depth), min(len(board), my + depth)): position = (r, c) if any([ out_of_range(my_position, position), utility.position_in_items(board, position, exclude), ]): continue if position == my_position: dist[position] = 0 else: dist[position] = np.inf prev[position] = None item = constants.Item(board[position]) items[item].append(position) # Djikstra H = [] heapq.heappush(H, (0, my_position)) while H: min_dist, position = heapq.heappop(H) if not utility.position_is_passable(board, position, enemies): continue x, y = position for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + x, col + y) if new_position not in dist: continue if min_dist + 1 < dist[new_position]: dist[new_position] = min_dist + 1 prev[new_position] = position heapq.heappush(H, (dist[new_position], new_position)) return items, dist, prev
def _find_safe_directions(self, board, my_position, unsafe_directions, bombs, enemies, item): def is_stuck_direction(next_position, bomb_range, next_board, enemies): '''Helper function to do determine if the agents next move is possible.''' Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() next_x, next_y = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) #FIXME is_stuck=False position_x, position_y = position if next_x != position_x and next_y != position_y: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + position_x, col + position_y) if new_position in seen: continue if not utility.position_on_board(next_board, new_position): continue if not utility.position_is_passable( next_board, new_position, enemies): continue dist = abs(row + position_x - next_x) + abs(col + position_y - next_y) Q.put((dist, new_position)) return is_stuck # All directions are unsafe. Return a position that won't leave us locked. safe = [] if len(unsafe_directions) == 4: next_board = board.copy() next_board[my_position] = constants.Item.Bomb.value disallowed = [] for direction, bomb_range in unsafe_directions.items(): next_position = utility.get_next_position( my_position, direction) next_x, next_y = next_position if not utility.position_on_board(next_board, next_position) or \ not utility.position_is_passable(next_board, next_position, enemies): disallowed.append(direction) continue if not is_stuck_direction(next_position, bomb_range, next_board, enemies): # We found a direction that works. The .items provided # a small bit of randomness. So let's go with this one. return [direction] if not safe: #当决定不动之前,判断是否是原地放炸弹,如果是原地放炸弹那么从unsafe_directions中随机一个 # for i in bombs: # if len(bombs) == 1 : if len(item[constants.Item(3)]) == 1: # if my_position == i['position']: for bomb in bombs: if my_position == bomb['position']: safe = [ k for k in unsafe_directions if k not in disallowed ] # break if not safe: safe = [constants.Action.Stop] return safe x, y = my_position disallowed = [] # The directions that will go off the board. for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: position = (x + row, y + col) direction = utility.get_direction(my_position, position) # Don't include any direction that will go off of the board. if not utility.position_on_board(board, position): disallowed.append(direction) continue # Don't include any direction that we know is unsafe. if direction in unsafe_directions: #当这个不安全位置不能通过的时候就disallow,防止踢炸弹 if not utility.position_is_passable(board, position, enemies): disallowed.append(direction) #当往不安全方向走,正好被炸死的话,那么就不能走。(刚好被炸死需要通过life来制定) # if continue if utility.position_is_passable( board, position, enemies) or utility.position_is_fog( board, position): #可能移动一个位置,隔壁存在炸弹 safe.append(direction) for bomb in bombs: if bomb['bomb_life'] == 1: bomb_x, bomb_y = bomb['position'] if bomb_x == position[0] and abs( bomb_y - position[1]) <= bomb['blast_strength']: #remove the direction safe.pop() break elif bomb_y == position[1] and abs( bomb_x - position[0]) <= bomb['blast_strength']: safe.pop() break if not safe: # We don't have any safe directions, so return something that is allowed. safe = [k for k in unsafe_directions if k not in disallowed] if not safe: # We don't have ANY directions. So return the stop choice. return [constants.Action.Stop] return safe
def get_all_possible_states(self): list_of_states = [] moves = [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right, 5 ] board_shape1, board_shape2 = self._board.shape #check if move will land me on top of a bomb #unsafe_directions = self._directions_in_range_of_bomb(self._board, self._my_position, self.curr_bombs) #if unsafe_directions: # if(len(unsafe_directions) != 4): # for i in unsafe_directions: #print("get all possible states, removing unsafe move", i) # moves.remove(i) # #if I am on a bomb, remove stop # if self._bomb_life[self._my_position[0]][self._my_position[1]] > 0: # if constants.Action.Stop in moves: # moves.remove(constants.Action.Stop) lost_all_moves = False if len(moves) == 0: lost_all_moves = True # input("FKING HELL") moves = [ constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right, constants.Action.Stop, 5 ] for move in moves: if move == 5 or utility.is_valid_direction( self._board, self._my_position, move): #check if position is passible check_pos = None if move == constants.Action.Up: check_pos = (self._my_position[0] - 1, self._my_position[1]) elif move == constants.Action.Down: check_pos = (self._my_position[0] + 1, self._my_position[1]) elif move == constants.Action.Left: check_pos = (self._my_position[0], self._my_position[1] - 1) elif move == constants.Action.Right: check_pos = (self._my_position[0], self._my_position[1] + 1) if check_pos != None: if not utility.position_is_passable( self._board, check_pos, self._enemies): #if i am blocked by a bomb, try kicking if self._obs['can_kick']: if move == constants.Action.Up: if self._board[self._my_position[0] - 1][self._my_position[1]] == 3: if self._my_position[0] - 2 >= 0: if self._board[ self._my_position[0] - 2][self._my_position[1]] != 0: # print("removing non passable move", move) continue else: # print("removing non passable move", move) continue elif move == constants.Action.Down: if self._board[self._my_position[0] + 1][self._my_position[1]] == 3: if self._my_position[0] + 2 < board_shape1: if self._board[ self._my_position[0] + 2][self._my_position[1]] != 0: # print("removing non passable move", move) continue else: # print("removing non passable move", move) continue elif move == constants.Action.Left: if self._board[self._my_position[0]][ self._my_position[1] - 1] == 3: if self._my_position[1] - 2 >= 0: if self._board[self._my_position[0]][ self._my_position[1] - 2] != 0: # print("removing non passable move", move) continue else: # print("removing non passable move", move) continue elif move == constants.Action.Right: if self._board[self._my_position[0]][ self._my_position[1] + 1] == 3: if self._my_position[1] + 2 < board_shape2: if self._board[self._my_position[0]][ self._my_position[1] + 2] != 0: # print("removing non passable move", move) continue else: # print("removing non passable move", move) continue else: # print("removing non passable move", move) continue else: # print("removing non passable move", move) continue #check to see if its a safe dir if move == 5 and self._ammo == 0: # print("bombing without a bomb, skip") #can not bomb with no ammo continue #if I am on a bomb, lets not bomb if move == 5 and self._my_position in self.bombing_agents: # print("bombing while on bomb, skip") continue temp_board, temp_curr_agent, temp_curr_bombs, temp_curr_items, temp_curr_flames, bombing_agents = self.advance_game_on_copy( move) temp_obs = self.fm.get_observations( temp_board, temp_curr_agent, temp_curr_bombs, temp_curr_flames, False, 11, self._game_mode, '')[self.self_agent_value - 10] temp_obs['ammo'] = self._ammo if move == 5: bombing_agents[( self._my_position[0], self._my_position[1])] = self.self_agent_value - 10 temp_obs['ammo'] = self._ammo - 1 temp_obs['enemies'] = self._enemies temp_state = State(temp_obs, True) temp_state.bombing_agents = bombing_agents temp_state.move = move temp_state.score = temp_state.get_score() temp_state.score -= 0.1 #IF THE SCORE IS NEGATIVE, WE DONT WANT THIS STATE #IF THE AGENT IS DEAD, NEGATIVE if not temp_state.am_I_alive: temp_state.score -= 100 if lost_all_moves == True: temp_state.score -= 200 list_of_states.append(temp_state) return list_of_states
def _djikstra(board, my_position, bombs, enemies, bomb_timer=None, depth=None, exclude=None): if depth is None: depth = len(board) * 2 if exclude is None: exclude = [ constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames ] def out_of_range(p1, p2): x1, y1 = p1 x2, y2 = p2 return abs(y2 - y1) + abs(x2 - x1) > depth items = defaultdict(list) for bomb in bombs: if bomb['position'] == my_position: items[constants.Item.Bomb].append(my_position) dist = {} prev = {} mx, my = my_position for r in range(max(0, mx - depth), min(len(board), mx + depth)): for c in range(max(0, my - depth), min(len(board), my + depth)): position = (r, c) if any([ out_of_range(my_position, position), utility.position_in_items(board, position, exclude), ]): continue if position == my_position: dist[position] = 0 else: dist[position] = np.inf prev[position] = None item = constants.Item(board[position]) items[item].append(position) # Djikstra H = [] heapq.heappush(H, (0, my_position)) while H: min_dist, position = heapq.heappop(H) if (board[position] != constants.Item.Bomb.value ) and not utility.position_is_passable(board, position, enemies): continue x, y = position for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + x, col + y) if new_position not in dist: continue if not utility.position_is_passable(board, new_position, enemies): continue if bomb_timer is not None: t = bomb_timer[new_position] if t > 0 and abs((min_dist + 1) - t) < 2: continue if min_dist + 1 < dist[new_position]: dist[new_position] = min_dist + 1 prev[new_position] = position heapq.heappush(H, (dist[new_position], new_position)) return items, dist, prev
def act(self, obs, action_space): def convert_bombs(bomb_map): ret = [] locations = np.where(bomb_map > 0) for r, c in zip(locations[0], locations[1]): ret.append({ 'position': (r, c), 'blast_strength': int(bomb_map[(r, c)]) }) return ret depth = 20 my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [constants.Item(e) for e in obs['enemies']] ammo = int(obs['ammo']) blast_strength = int(obs['blast_strength']) if self.prev_pos != None: if self.prev_pos == my_position: if 1 <= self.prev_action.value <= 4: if self.logging: print('freeze') board[self.prev_pos] = constants.Item.Rigid.value items, dist, prev = self._djikstra(board, my_position, bombs, enemies, bomb_timer=self.bomb_time, depth=depth) if self.logging: print('my_position =', my_position) print('board =') print(board) print('dist =') print(dist) print('bombs =', bombs) print('enemies =', enemies) for e in enemies: print(e) pos = items.get(e, []) print('pos =', pos) print('pos_len=', len(pos)) if len(pos) > 0: print('xy=', pos[0][0], ',', pos[0][1]) # print('pos_r =', x, ',',y) print('ammo =', ammo) print('blast_strength =', blast_strength) test_ary = np.ones((11, 11)) for c in range(11): for r in range(11): if (r, c) in dist: test_ary[r, c] = dist[(r, c)] else: test_ary[r, c] = -1 if self.logging: print("dist_mat:") print(test_ary) # update bomb_time map bomb_life = 8 has_bomb = {} already_breakable = np.zeros((11, 11)) for b in bombs: r, c = b['position'] strength = b['blast_strength'] # print('bomb_cr =', c, 'r=', r, 'st=', strength) if self.bomb_time[(r, c)] == 0: self.bomb_time[(r, c)] = bomb_life else: self.bomb_time[(r, c)] -= 1 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue # if new_pos[0] < 0 or new_pos[0] > 10: # continue # if new_pos[1] < 0 or new_pos[1] > 10: # continue if utility.position_is_rigid(board, new_pos): continue if utility.position_is_wood(board, new_pos): already_breakable[new_pos] = 1 if self.bomb_time[new_pos] == 0: self.bomb_time[new_pos] = bomb_life else: self.bomb_time[new_pos] -= 1 has_bomb[new_pos] = 1 # clear up table for c in range(11): for r in range(11): if (r, c) not in has_bomb: self.bomb_time[(r, c)] = 0 if self.logging: print("bomb_time:") print(self.bomb_time) # evaluate each position in terms of breakable woods num_breakable = np.zeros((11, 11)) num_breakable_inside = np.zeros((11, 11)) for c in range(11): for r in range(11): if utility.position_is_wood(board, (r, c)): if already_breakable[(r, c)]: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): num_breakable[new_pos] += 1 else: break tmp_num = 0 has_passable = False for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_wood(board, new_pos): tmp_num += 1 elif utility.position_is_passable( board, new_pos, enemies): has_passable = True if (not has_passable) and tmp_num > 0: tmp_num -= 1 num_breakable_inside[(r, c)] = tmp_num if self.logging: print('num_breakable:') print(num_breakable) print('num_breakable_inside:') print(num_breakable_inside) num_breakable_total = np.zeros((11, 11)) for c in range(11): for r in range(11): num_breakable_total[(r, c)] = num_breakable[(r, c)] if num_breakable_total[(r, c)] == -1 or num_breakable_total[( r, c)] == np.inf: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue num_breakable_total[( r, c)] += num_breakable_inside[new_pos] * 0.5 if self.logging: print('num_breakable_total:') print(num_breakable_total) # evaluate each position in total pos_scores = np.zeros((11, 11)) for c in range(11): for r in range(11): if (r, c) not in dist: pos_scores[(r, c)] = -1 continue elif dist[(r, c)] == np.inf: pos_scores[(r, c)] = np.inf continue if num_breakable_total[(r, c)] > 0: pos_scores[(r, c)] += num_breakable_total[(r, c)] pos_scores[(r, c)] += (depth - dist[(r, c)]) * 0.2 # consider power-up items if board[(r, c)] in { constants.Item.ExtraBomb.value, constants.Item.IncrRange.value }: pos_scores[(r, c)] += 50 if self.logging: print('pos_score:') print(pos_scores) # consider degree of freedom dis_to_ene = 100 for e in enemies: pos = items.get(e, []) if len(pos) > 0: d = abs(pos[0][0] - my_position[0]) + abs(pos[0][1] - my_position[1]) if dis_to_ene > d: dis_to_ene = d if dis_to_ene <= -4: # if direction is not None: deg_frees = np.zeros((11, 11)) for c in range(11): for r in range(11): # if pos_scores[(r, c)] == np.inf: # continue if not utility.position_is_passable( board, (r, c), enemies): continue deg_free = 0 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): deg_free += 1 deg_frees[(r, c)] = deg_free if deg_free <= 1: pos_scores[(r, c)] -= 5 if self.logging: print('deg_free') print(deg_frees) # consider bomb blast for i in range(len(bombs)): r, c = bombs[i]['position'] strength = bombs[i]['blast_strength'] pos_scores[(r, c)] = -20 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if new_pos not in dist: continue elif new_pos == np.inf: continue pos_scores[new_pos] = -20 if self.logging: print('consider blast pos_score:') print(pos_scores) # consider enemies for e in enemies: pos = items.get(e, []) if len(pos) > 0: r = pos[0][0] c = pos[0][1] for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength * 2): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if not utility.position_is_passable( board, new_pos, enemies): break pos_scores[new_pos] += 0.3 if self.logging: print('consider enemy:') print(pos_scores) h_r, h_c = -1, -1 h_score = -1 for c in range(11): for r in range(11): if (r, c) not in dist: continue elif dist[(r, c)] == np.inf: continue if h_score < pos_scores[(r, c)]: h_score = pos_scores[(r, c)] h_r, h_c = (r, c) if self.logging: print('h_score and pos:', h_score, '(r, c) =', h_r, ',', h_c) print('prev:') print(prev) # if current position is not the highest score position, move to the highest position. if h_r == -1: # print('action: Stop') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value elif pos_scores[my_position] == h_score: if self._can_escape(pos_scores, my_position, blast_strength): # print('set bomb') self.prev_action = constants.Action.Bomb # return constants.Action.Bomb.value else: # print('action: Stop2') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value else: # print('action: backtrack') self.prev_action = self._backtrack(my_position, (h_r, h_c), prev) # return self._backtrack(my_position, (h_r, h_c), prev) self.prev_pos = my_position if self.logging: print('action: ', self.prev_action) return self.prev_action.value # Move if we are in an unsafe place. unsafe_directions = self._directions_in_range_of_bomb( board, my_position, bombs, dist) if unsafe_directions: directions = self._find_safe_directions(board, my_position, unsafe_directions, bombs, enemies) return random.choice(directions).value # Lay pomme if we are adjacent to an enemy. if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb( ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value # Move towards an enemy if there is one in exactly three reachable spaces. direction = self._near_enemy(my_position, items, dist, prev, enemies, 3) if direction is not None and (self._prev_direction != direction or random.random() < .5): self._prev_direction = direction return direction.value # Move towards a good item if there is one within two reachable spaces. direction = self._near_good_powerup(my_position, items, dist, prev, 2) if direction is not None: return direction.value # Maybe lay a bomb if we are within a space of a wooden wall. if self._near_wood(my_position, items, dist, prev, 1): if self._maybe_bomb(ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value else: return constants.Action.Stop.value # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb. direction = self._near_wood(my_position, items, dist, prev, 2) if direction is not None: directions = self._filter_unsafe_directions( board, my_position, [direction], bombs) if directions: return directions[0].value # Choose a random but valid direction. directions = [ constants.Action.Stop, constants.Action.Left, constants.Action.Right, constants.Action.Up, constants.Action.Down ] valid_directions = self._filter_invalid_directions( board, my_position, directions, enemies) directions = self._filter_unsafe_directions(board, my_position, valid_directions, bombs) directions = self._filter_recently_visited( directions, my_position, self._recently_visited_positions) if len(directions) > 1: directions = [k for k in directions if k != constants.Action.Stop] if not len(directions): directions = [constants.Action.Stop] # Add this position to the recently visited uninteresting positions so we don't return immediately. self._recently_visited_positions.append(my_position) self._recently_visited_positions = self._recently_visited_positions[ -self._recently_visited_length:] return random.choice(directions).value
def not_stuck_directions(obs): my_position = tuple(obs['position']) board = np.array(obs['board']) enemies = [consts.Item(e) for e in obs['enemies']] def is_stuck_direction(next_position, bomb_range, next_board, enemies): Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() nx, ny = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) px, py = position if nx != px and ny != py: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + px, col + py) if new_position in seen: continue if not util.position_on_board(next_board, new_position): continue if not util.position_is_passable(next_board, new_position, enemies): continue dist = abs(row + px - nx) + abs(col + py - ny) Q.put((dist, new_position)) return is_stuck res = [0] * 6 next_board = board.copy() next_board[my_position] = consts.Item.Bomb.value for direction in dirs: next_position = util.get_next_position(my_position, direction) nx, ny = next_position if not util.position_on_board(next_board, next_position) or \ not util.position_is_passable(next_board, next_position, enemies): continue if not is_stuck_direction( next_position, obs['bomb_blast_strength'][nx, ny], next_board, enemies): # We found a direction that works. The .items provided # a small bit of randomness. So let's go with this one. res[direction.value] = 1 if res == [0] * 6: res = [-1] * 6 res[0] = 1 return res