def _what_to_break(cls, board, my_position, blast_strength): x, y = my_position to_break = list() # To up for dx in range(1, blast_strength): if x + dx >= len(board[0]): break position = (x + dx, y) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To down for dx in range(1, blast_strength): if x - dx < 0: break position = (x - dx, y) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To right for dy in range(1, blast_strength): if y + dy >= len(board): break position = (x, y + dy) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To left for dy in range(1, blast_strength): if y - dy < 0: break position = (x, y - dy) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break return to_break
def get_observation_state(self, board, pos, enemies, bomb_map, bomb_life, ammo, can_kick): """ Need just the board layout to decide everything board -> np.array pos -> tuple enemies -> list """ bombs = self.convert_bombs(np.array(bomb_map), np.array(bomb_life)) has_bomb = False has_enemy = False # is_surrounded = False is_surrounded = False los_bomb = False has_ammo = False # can kick is also a valid state if ammo > 0: has_ammo = True x, y = pos dirX = [-1, 1, 0, 0] dirY = [0, 0, -1, 1] blocks = 0 for k in range(0, len(dirX)): newX = x + dirX[k] newY = y + dirY[k] # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[ 1] and newX >= 0 and newY >= 0: if utility.position_is_bomb(bombs, (newX, newY)): has_bomb = True if utility.position_is_rigid(board, (newX, newY)): # is_surrounded = True blocks += 1 if utility.position_is_enemy(board, pos, enemies): has_enemy = True los_bomb = self.check_bomb((newX, newY), bombs) or los_bomb if utility.position_is_bomb(bombs, (x, y)) or self.check_bomb( (x, y), bombs): has_bomb = True if blocks > 2: is_surrounded = True return State(has_bomb, has_enemy, is_surrounded, los_bomb, has_ammo, can_kick)
def _make_safety_score(cls, board, items, bombs, enemies): safety_score = np.ones(board.shape) for bomb in bombs: x, y = bomb["position"] bomb_range = bomb["blast_strength"] safety_score[(x, y)] = -np.inf for dx in range(1, bomb_range): if x + dx >= len(board): break position = (x + dx, y) if utility.position_is_rigid(board, position): #safety_score[position] = -np.inf break safety_score[position] = -np.inf for dx in range(1, bomb_range): if x - dx < 0: break position = (x - dx, y) if utility.position_is_rigid(board, position): #safety_score[position] = -np.inf break safety_score[position] = -np.inf for dy in range(1, bomb_range): if y + dy >= len(board[0]): break position = (x, y + dy) if utility.position_is_rigid(board, position): #safety_score[position] = -np.inf break safety_score[position] = -np.inf for dy in range(1, bomb_range): if y - dy < 0: break position = (x, y - dy) if utility.position_is_rigid(board, position): #safety_score[position] = -np.inf break safety_score[position] = -np.inf # wall for x in range(len(board)): for y in range(len(board)): position = (x, y) if utility.position_is_wall(board, position): safety_score[position] = -np.inf is_safe = (safety_score == 1) safety_score[1:, :] += is_safe[:-1, :] safety_score[:-1, :] += is_safe[1:, :] safety_score[:, 1:] += is_safe[:, :-1] safety_score[:, :-1] += is_safe[:, 1:] # enemies for enemy in enemies: for position in items.get(enemy, []): x, y = position safety_score[position] -= 1 if x > 0: safety_score[(x - 1, y)] -= 1 if y > 0: safety_score[(x, y - 1)] -= 1 if x < len(board) - 1: safety_score[(x + 1, y)] -= 1 if y < len(board) - 1: safety_score[(x, y + 1)] -= 1 return safety_score
def act(self, obs, action_space): def convert_bombs(bomb_map): ret = [] locations = np.where(bomb_map > 0) for r, c in zip(locations[0], locations[1]): ret.append({ 'position': (r, c), 'blast_strength': int(bomb_map[(r, c)]) }) return ret depth = 20 my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [constants.Item(e) for e in obs['enemies']] ammo = int(obs['ammo']) blast_strength = int(obs['blast_strength']) if self.prev_pos != None: if self.prev_pos == my_position: if 1 <= self.prev_action.value <= 4: if self.logging: print('freeze') board[self.prev_pos] = constants.Item.Rigid.value items, dist, prev = self._djikstra(board, my_position, bombs, enemies, bomb_timer=self.bomb_time, depth=depth) if self.logging: print('my_position =', my_position) print('board =') print(board) print('dist =') print(dist) print('bombs =', bombs) print('enemies =', enemies) for e in enemies: print(e) pos = items.get(e, []) print('pos =', pos) print('pos_len=', len(pos)) if len(pos) > 0: print('xy=', pos[0][0], ',', pos[0][1]) # print('pos_r =', x, ',',y) print('ammo =', ammo) print('blast_strength =', blast_strength) test_ary = np.ones((11, 11)) for c in range(11): for r in range(11): if (r, c) in dist: test_ary[r, c] = dist[(r, c)] else: test_ary[r, c] = -1 if self.logging: print("dist_mat:") print(test_ary) # update bomb_time map bomb_life = 8 has_bomb = {} already_breakable = np.zeros((11, 11)) for b in bombs: r, c = b['position'] strength = b['blast_strength'] # print('bomb_cr =', c, 'r=', r, 'st=', strength) if self.bomb_time[(r, c)] == 0: self.bomb_time[(r, c)] = bomb_life else: self.bomb_time[(r, c)] -= 1 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue # if new_pos[0] < 0 or new_pos[0] > 10: # continue # if new_pos[1] < 0 or new_pos[1] > 10: # continue if utility.position_is_rigid(board, new_pos): continue if utility.position_is_wood(board, new_pos): already_breakable[new_pos] = 1 if self.bomb_time[new_pos] == 0: self.bomb_time[new_pos] = bomb_life else: self.bomb_time[new_pos] -= 1 has_bomb[new_pos] = 1 # clear up table for c in range(11): for r in range(11): if (r, c) not in has_bomb: self.bomb_time[(r, c)] = 0 if self.logging: print("bomb_time:") print(self.bomb_time) # evaluate each position in terms of breakable woods num_breakable = np.zeros((11, 11)) num_breakable_inside = np.zeros((11, 11)) for c in range(11): for r in range(11): if utility.position_is_wood(board, (r, c)): if already_breakable[(r, c)]: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): num_breakable[new_pos] += 1 else: break tmp_num = 0 has_passable = False for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_wood(board, new_pos): tmp_num += 1 elif utility.position_is_passable( board, new_pos, enemies): has_passable = True if (not has_passable) and tmp_num > 0: tmp_num -= 1 num_breakable_inside[(r, c)] = tmp_num if self.logging: print('num_breakable:') print(num_breakable) print('num_breakable_inside:') print(num_breakable_inside) num_breakable_total = np.zeros((11, 11)) for c in range(11): for r in range(11): num_breakable_total[(r, c)] = num_breakable[(r, c)] if num_breakable_total[(r, c)] == -1 or num_breakable_total[( r, c)] == np.inf: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue num_breakable_total[( r, c)] += num_breakable_inside[new_pos] * 0.5 if self.logging: print('num_breakable_total:') print(num_breakable_total) # evaluate each position in total pos_scores = np.zeros((11, 11)) for c in range(11): for r in range(11): if (r, c) not in dist: pos_scores[(r, c)] = -1 continue elif dist[(r, c)] == np.inf: pos_scores[(r, c)] = np.inf continue if num_breakable_total[(r, c)] > 0: pos_scores[(r, c)] += num_breakable_total[(r, c)] pos_scores[(r, c)] += (depth - dist[(r, c)]) * 0.2 # consider power-up items if board[(r, c)] in { constants.Item.ExtraBomb.value, constants.Item.IncrRange.value }: pos_scores[(r, c)] += 50 if self.logging: print('pos_score:') print(pos_scores) # consider degree of freedom dis_to_ene = 100 for e in enemies: pos = items.get(e, []) if len(pos) > 0: d = abs(pos[0][0] - my_position[0]) + abs(pos[0][1] - my_position[1]) if dis_to_ene > d: dis_to_ene = d if dis_to_ene <= -4: # if direction is not None: deg_frees = np.zeros((11, 11)) for c in range(11): for r in range(11): # if pos_scores[(r, c)] == np.inf: # continue if not utility.position_is_passable( board, (r, c), enemies): continue deg_free = 0 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): deg_free += 1 deg_frees[(r, c)] = deg_free if deg_free <= 1: pos_scores[(r, c)] -= 5 if self.logging: print('deg_free') print(deg_frees) # consider bomb blast for i in range(len(bombs)): r, c = bombs[i]['position'] strength = bombs[i]['blast_strength'] pos_scores[(r, c)] = -20 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if new_pos not in dist: continue elif new_pos == np.inf: continue pos_scores[new_pos] = -20 if self.logging: print('consider blast pos_score:') print(pos_scores) # consider enemies for e in enemies: pos = items.get(e, []) if len(pos) > 0: r = pos[0][0] c = pos[0][1] for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength * 2): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if not utility.position_is_passable( board, new_pos, enemies): break pos_scores[new_pos] += 0.3 if self.logging: print('consider enemy:') print(pos_scores) h_r, h_c = -1, -1 h_score = -1 for c in range(11): for r in range(11): if (r, c) not in dist: continue elif dist[(r, c)] == np.inf: continue if h_score < pos_scores[(r, c)]: h_score = pos_scores[(r, c)] h_r, h_c = (r, c) if self.logging: print('h_score and pos:', h_score, '(r, c) =', h_r, ',', h_c) print('prev:') print(prev) # if current position is not the highest score position, move to the highest position. if h_r == -1: # print('action: Stop') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value elif pos_scores[my_position] == h_score: if self._can_escape(pos_scores, my_position, blast_strength): # print('set bomb') self.prev_action = constants.Action.Bomb # return constants.Action.Bomb.value else: # print('action: Stop2') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value else: # print('action: backtrack') self.prev_action = self._backtrack(my_position, (h_r, h_c), prev) # return self._backtrack(my_position, (h_r, h_c), prev) self.prev_pos = my_position if self.logging: print('action: ', self.prev_action) return self.prev_action.value # Move if we are in an unsafe place. unsafe_directions = self._directions_in_range_of_bomb( board, my_position, bombs, dist) if unsafe_directions: directions = self._find_safe_directions(board, my_position, unsafe_directions, bombs, enemies) return random.choice(directions).value # Lay pomme if we are adjacent to an enemy. if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb( ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value # Move towards an enemy if there is one in exactly three reachable spaces. direction = self._near_enemy(my_position, items, dist, prev, enemies, 3) if direction is not None and (self._prev_direction != direction or random.random() < .5): self._prev_direction = direction return direction.value # Move towards a good item if there is one within two reachable spaces. direction = self._near_good_powerup(my_position, items, dist, prev, 2) if direction is not None: return direction.value # Maybe lay a bomb if we are within a space of a wooden wall. if self._near_wood(my_position, items, dist, prev, 1): if self._maybe_bomb(ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value else: return constants.Action.Stop.value # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb. direction = self._near_wood(my_position, items, dist, prev, 2) if direction is not None: directions = self._filter_unsafe_directions( board, my_position, [direction], bombs) if directions: return directions[0].value # Choose a random but valid direction. directions = [ constants.Action.Stop, constants.Action.Left, constants.Action.Right, constants.Action.Up, constants.Action.Down ] valid_directions = self._filter_invalid_directions( board, my_position, directions, enemies) directions = self._filter_unsafe_directions(board, my_position, valid_directions, bombs) directions = self._filter_recently_visited( directions, my_position, self._recently_visited_positions) if len(directions) > 1: directions = [k for k in directions if k != constants.Action.Stop] if not len(directions): directions = [constants.Action.Stop] # Add this position to the recently visited uninteresting positions so we don't return immediately. self._recently_visited_positions.append(my_position) self._recently_visited_positions = self._recently_visited_positions[ -self._recently_visited_length:] return random.choice(directions).value
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(info["list_boards_no_move"], my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=0) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( succ, 1, next_position) most_survivable_action = None if survivable_actions: survivable_score = dict() for action in n_survivable: #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum( [n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break if most_survivable_action is not None: print("Most survivable action", most_survivable_action) return most_survivable_action.value # kick if possible if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print("Kickable", my_kick, kickable) while kickable: next_position = kickable.pop() action = self._get_direction(my_position, next_position) print("Must kick to survive", action) return action.value # move towards a teammate if she is blocking for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility._position_is_item(board, next_position, my_teammate): print("Must move to teammate to survive", action) return action.value # move towards an enemy for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_enemy(board, next_position, my_enemies): print("Must move to enemy to survive", action) return action.value # move towards anywhere besides ridid for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_rigid(board, next_position): continue if utility.position_is_wood(board, next_position): continue if utility.position_is_bomb(info["curr_bombs"], next_position): continue print("Try moving to survive", action) return action.value action = constants.Action.Stop print("Must die", action) return action
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] enemy_position = dict() for enemy in my_enemies: positions = np.argwhere(board == enemy.value) if len(positions) == 0: continue enemy_position[enemy] = tuple(positions[0]) survivable_steps = defaultdict(int) # # survivable tree in standard case # list_boards_no_kick = deepcopy(info["list_boards_no_move"]) # remove myself if obs["bomb_blast_strength"][my_position]: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value my_survivable, my_prev, my_succ, my_survivable_with_enemy \ = self._get_survivable_with_enemy(list_boards_no_kick, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) for next_position in my_survivable[1]: my_action = self._get_direction(my_position, next_position) survivable_steps[my_action] = life[(1, ) + next_position] # # survivable tree if I lay bomb # if all([obs["ammo"] > 0, obs["bomb_life"][my_position] == 0]): # if I can lay a bomb board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(info["curr_bombs"]) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, info["curr_flames"], self._search_range, my_position, enemy_mobility=0) my_survivable_with_bomb, my_prev_with_bomb, my_succ_with_bomb, my_survivable_with_bomb_enemy \ = self._get_survivable_with_enemy(list_boards_with_bomb, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_bomb_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev_with_bomb[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) survivable_steps[constants.Action.Bomb] = life[(1, ) + my_position] print("survivable steps") print(survivable_steps) if survivable_steps: values = np.array(list(survivable_steps.values())) print(values) best_index = np.where(values == np.max(values)) best_actions = np.array(list(survivable_steps.keys()))[best_index] best_action = random.choice(best_actions) print("Most survivable action", best_action) return best_action.value else: print("No actions: stop") return constants.Action.Stop.value # # survivable tree if I kick # if my_kick: # Positions where I kick a bomb if I move to kickable, more_kickable = self._kickable_positions( obs, info["moving_direction"]) for next_position in set.union(*[kickable, more_kickable]): # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) my_survivable_with_kick[next_position], my_prev_with_kick[next_position], my_succ_with_bomb[next_position], my_survivable_with_kick_enemy[next_position] \ = self._get_survivable_with_enemy(list_boards_with_kick[1:], next_position, enemy_position) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(my_survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=0) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( my_succ, 1, next_position) most_survivable_action = None if survivable_actions: survivable_score = dict() for action in n_survivable: #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum( [n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break if most_survivable_action is not None: print("Most survivable action", most_survivable_action) return most_survivable_action.value # kick if possible if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print("Kickable", my_kick, kickable) while kickable: next_position = kickable.pop() action = self._get_direction(my_position, next_position) print("Must kick to survive", action) return action.value # move towards a teammate if she is blocking for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility._position_is_item(board, next_position, my_teammate): print("Must move to teammate to survive", action) return action.value # move towards an enemy for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_enemy(board, next_position, my_enemies): print("Must move to enemy to survive", action) return action.value # move towards anywhere besides ridid for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_rigid(board, next_position): continue if utility.position_is_wood(board, next_position): continue if utility.position_is_bomb(info["curr_bombs"], next_position): continue print("Try moving to survive", action) return action.value action = constants.Action.Stop print("Must die", action) return action