def get_possible_actions(self, obs, board, pos, ammo, can_kick, bombs): valid_acts = [0] x, y = pos dirX = [-1, 1, 0, 0] dirY = [ 0, 0, -1, 1] for k in range(0, len(dirX)): newX = x + dirX[k] newY = y + dirY[k] # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[1] and newX >=0 and newY >= 0: cbom = self.check_bomb((newX, newY), bombs) if ((board[newX, newY] in [0, 5, 6, 7, 8]) and (not cbom)): valid_acts.append(k+1) elif board[newX, newY] in [3] and can_kick: valid_acts.append(k+1) #print('contributed to suicide !') elif board[newX, newY] in [0, 6, 7, 8] and utility.position_is_bomb(bombs, (x,y)): #print('contributed to death !!!') valid_acts.append(k+1) #print('Appending ', k+1, newX, newY, cbom) if ammo > 0: valid_acts.append(5) if len(valid_acts) > 1 and utility.position_is_bomb(bombs, (pos[0], pos[1])) and self.check_bomb((pos[0], pos[1]), bombs): valid_acts.pop(0) for i in range(6, len(Actions)): if self.virtual_actions[i].is_valid(self.cur_state, obs): #print("appending act ", i) valid_acts.append(i) return valid_acts
def get_observation_state(self, board, pos, enemies, bomb_map): """ Need just the board layout to decide everything board -> np.array pos -> tuple enemies -> list """ def convert_bombs(bomb_map): '''Flatten outs the bomb array''' ret = [] locations = np.where(bomb_map > 0) for r, c in zip(locations[0], locations[1]): ret.append(crazy_util.dotdict({ 'position': (r, c), 'blast_strength': int(bomb_map[(r, c)]) })) return ret bombs = convert_bombs(np.array(bomb_map)) has_bomb = False has_enemy = False has_wood = False los_bomb = False x, y = pos dirX = [-1,0,1,0] dirY = [0,1,0,-1] for k1 in dirX: for k2 in dirY: newX = x+k1 newY = y+k2 # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[1] and newX >=0 and newY >= 0: if utility.position_is_bomb(bombs, (newX, newY)): has_bomb = True if utility.position_is_wood(board, (newX, newY)): has_wood = True if utility.position_is_enemy(board, pos, enemies): has_enemy = True for k1 in range(0, board.shape[0]): if utility.position_is_bomb(bombs, (k1, y)): los_bomb = True elif utility.position_is_bomb(bombs, (x, k1)): los_bomb = True if utility.position_is_bomb(bombs, (x,y)): has_bomb = True if has_bomb: return 0 elif los_bomb: return 4 elif has_enemy: return 1 elif has_wood: return 2 else: return 3
def get_observation_state(self, board, pos, enemies, bomb_map, bomb_life, ammo): """ Need just the board layout to decide everything board -> np.array pos -> tuple enemies -> list """ bombs = self.convert_bombs(np.array(bomb_map), np.array(bomb_life)) has_bomb = False has_enemy = False has_wood = False los_bomb = False has_ammo = False if ammo > 0: has_ammo = True x, y = pos dirX = [-1, 1, 0, 0] dirY = [0, 0, -1, 1] for k in range(0, len(dirX)): newX = x + dirX[k] newY = y + dirY[k] # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[ 1] and newX >= 0 and newY >= 0: if utility.position_is_bomb(bombs, (newX, newY)): has_bomb = True if utility.position_is_wood(board, (newX, newY)): has_wood = True if utility.position_is_enemy(board, pos, enemies): has_enemy = True los_bomb = self.check_bomb((newX, newY), bombs) if utility.position_is_bomb(bombs, (x, y)): has_bomb = True state = 3 if has_bomb: state = 0 elif los_bomb: state = 4 elif has_enemy: state = 1 elif has_wood: state = 2 else: state = 3 if has_ammo: state = 2 * state return state
def get_observation_state(self, board, pos, enemies, bomb_map, bomb_life, ammo, can_kick): """ Need just the board layout to decide everything board -> np.array pos -> tuple enemies -> list """ bombs = self.convert_bombs(np.array(bomb_map), np.array(bomb_life)) has_bomb = False has_enemy = False # is_surrounded = False is_surrounded = False los_bomb = False has_ammo = False # can kick is also a valid state if ammo > 0: has_ammo = True x, y = pos dirX = [-1, 1, 0, 0] dirY = [0, 0, -1, 1] blocks = 0 for k in range(0, len(dirX)): newX = x + dirX[k] newY = y + dirY[k] # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[ 1] and newX >= 0 and newY >= 0: if utility.position_is_bomb(bombs, (newX, newY)): has_bomb = True if utility.position_is_rigid(board, (newX, newY)): # is_surrounded = True blocks += 1 if utility.position_is_enemy(board, pos, enemies): has_enemy = True los_bomb = self.check_bomb((newX, newY), bombs) or los_bomb if utility.position_is_bomb(bombs, (x, y)) or self.check_bomb( (x, y), bombs): has_bomb = True if blocks > 2: is_surrounded = True return State(has_bomb, has_enemy, is_surrounded, los_bomb, has_ammo, can_kick)
def get_observation_state(self, board, pos, enemies, bomb_map, ammo): """ Need just the board layout to decide everything board -> np.array pos -> tuple enemies -> list """ bombs = self.convert_bombs(np.array(bomb_map)) has_bomb = False has_enemy = False has_wood = False los_bomb = False has_ammo = False if ammo > 0: has_ammo = True x, y = pos dirX = [-1, 1, 0, 0] dirY = [0, 0, -1, 1] for k in range(0, len(dirX)): newX = x + dirX[k] newY = y + dirY[k] # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[ 1] and newX >= 0 and newY >= 0: if utility.position_is_bomb(bombs, (newX, newY)): has_bomb = True if utility.position_is_wood(board, (newX, newY)): has_wood = True if utility.position_is_enemy(board, pos, enemies): has_enemy = True for bomb in bombs: if ((abs(newX - bomb['position'][0]) <= bomb['blast_strength'] and newY == bomb['position'][1]) or (abs(newY - bomb['position'][1]) <= bomb['blast_strength'] and newX == bomb['position'][0])): los_bomb = True if utility.position_is_bomb(bombs, (x, y)): has_bomb = True return State(has_bomb, has_enemy, has_wood, los_bomb, has_ammo)
def get_possible_actions(self, board, pos, ammo, can_kick, bombs): """ 0 : Pass 1 : Up 2 : Down 3 : Left 4 : Right 5 : Bomb """ valid_acts = [0] x, y = pos dirX = [-1, 1, 0, 0] dirY = [0, 0, -1, 1] #print(bombs) for k in range(0, len(dirX)): newX = x + dirX[k] newY = y + dirY[k] # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[ 1] and newX >= 0 and newY >= 0: cbom = self.check_bomb((newX, newY), bombs) if ((board[newX, newY] in [0, 5, 6, 7, 8]) and (not cbom)): valid_acts.append(k + 1) elif board[newX, newY] in [3] and can_kick: valid_acts.append(k + 1) #print('contributed to suicide !') elif board[newX, newY] in [0, 6, 7, 8 ] and utility.position_is_bomb( bombs, (x, y)): #print('contributed to death !!!') valid_acts.append(k + 1) #print('Appending ', k+1, newX, newY, cbom) if ammo > 0: valid_acts.append(5) if len(valid_acts) > 1 and utility.position_is_bomb( bombs, (pos[0], pos[1])) and self.check_bomb( (pos[0], pos[1]), bombs): valid_acts.pop(0) return valid_acts
def get_observation_state(self, obs): """ Need just the board layout to decide everything board -> np.array pos -> tuple enemies -> list Interesting keys: obs['board'], obs['position'], obs['teammate'], obs['enemies'], obs['bomb_blast_strength'], obs['bomb_life'], obs['ammo'], obs['can_kick'] """ board = obs["board"] bombs = self.convert_bombs(np.array(obs["bomb_blast_strength"]), np.array(obs["bomb_life"])) d = collections.OrderedDict({ "bomb_nearby": Proximity.NONE, "enemy_nearby": Proximity.NONE, "is_surrounded": False, "los_bomb": False, "ammo": 3 if obs['ammo'] > 3 else obs['ammo'], "can_kick": obs['can_kick'], "blast_strength": BlastStrength.LOW if obs['blast_strength'] <= 2 else BlastStrength.HIGH , "enemies_alive": len(list(filter(lambda enemy: enemy.value in obs['alive'], obs['enemies']))), "nearby_enemy_has_bomb": False, "nearby_enemy_can_kick": False, "next_to_wood": False }) x, y = obs['position'] nearby_enemy_id = None for del_x in range(-2, 3): for del_y in range(-2, 3): newX = x + del_x newY = y + del_y immediate_zone = abs(del_x) <= 1 and abs(del_y) <= 1 if newX < board.shape[0] and newY < board.shape[1] and newX >= 0 and newY >= 0: if utility.position_is_bomb(bombs, (newX, newY)): d['bomb_nearby'] = Proximity.IMMEDIATE if immediate_zone else Proximity.CLOSE if immediate_zone and obs["board"][newX, newY] == 2: d["next_to_wood"] = True if utility.position_is_enemy(obs['board'], (newX, newY), obs['enemies']): nearby_enemy_id = obs['board'][newX, newY] d['enemy_nearby'] = Proximity.IMMEDIATE if immediate_zone else Proximity.CLOSE d['los_bomb'] = self.check_bomb((newX, newY), bombs) or d['los_bomb'] if utility.position_is_bomb(bombs, (x,y)) or self.check_bomb((x,y), bombs): # TODO why two conditions? d["bomb_nearby"] = Proximity.IMMEDIATE d["is_surrounded"] = ep.is_pos_surrounded(obs["board"], obs["position"], self.agent_value) #print(d["is_surrounded"]) if nearby_enemy_id and self.enemy_info: #print(self.enemy_info) enemy_object = self.enemy_info[nearby_enemy_id - 10] # 10, 11, 12, 13 index, one assumes d["nearby_enemy_has_bomb"] = enemy_object['ammo'] > 0 d["nearby_enemy_can_kick"] = enemy_object['can_kick'] > 0 return AliveState(**d)
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(info["list_boards_no_move"], my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=0) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( succ, 1, next_position) most_survivable_action = None if survivable_actions: survivable_score = dict() for action in n_survivable: #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum( [n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break if most_survivable_action is not None: print("Most survivable action", most_survivable_action) return most_survivable_action.value # kick if possible if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print("Kickable", my_kick, kickable) while kickable: next_position = kickable.pop() action = self._get_direction(my_position, next_position) print("Must kick to survive", action) return action.value # move towards a teammate if she is blocking for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility._position_is_item(board, next_position, my_teammate): print("Must move to teammate to survive", action) return action.value # move towards an enemy for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_enemy(board, next_position, my_enemies): print("Must move to enemy to survive", action) return action.value # move towards anywhere besides ridid for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_rigid(board, next_position): continue if utility.position_is_wood(board, next_position): continue if utility.position_is_bomb(info["curr_bombs"], next_position): continue print("Try moving to survive", action) return action.value action = constants.Action.Stop print("Must die", action) return action
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] enemy_position = dict() for enemy in my_enemies: positions = np.argwhere(board == enemy.value) if len(positions) == 0: continue enemy_position[enemy] = tuple(positions[0]) survivable_steps = defaultdict(int) # # survivable tree in standard case # list_boards_no_kick = deepcopy(info["list_boards_no_move"]) # remove myself if obs["bomb_blast_strength"][my_position]: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value my_survivable, my_prev, my_succ, my_survivable_with_enemy \ = self._get_survivable_with_enemy(list_boards_no_kick, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) for next_position in my_survivable[1]: my_action = self._get_direction(my_position, next_position) survivable_steps[my_action] = life[(1, ) + next_position] # # survivable tree if I lay bomb # if all([obs["ammo"] > 0, obs["bomb_life"][my_position] == 0]): # if I can lay a bomb board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(info["curr_bombs"]) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, info["curr_flames"], self._search_range, my_position, enemy_mobility=0) my_survivable_with_bomb, my_prev_with_bomb, my_succ_with_bomb, my_survivable_with_bomb_enemy \ = self._get_survivable_with_enemy(list_boards_with_bomb, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_bomb_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev_with_bomb[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) survivable_steps[constants.Action.Bomb] = life[(1, ) + my_position] print("survivable steps") print(survivable_steps) if survivable_steps: values = np.array(list(survivable_steps.values())) print(values) best_index = np.where(values == np.max(values)) best_actions = np.array(list(survivable_steps.keys()))[best_index] best_action = random.choice(best_actions) print("Most survivable action", best_action) return best_action.value else: print("No actions: stop") return constants.Action.Stop.value # # survivable tree if I kick # if my_kick: # Positions where I kick a bomb if I move to kickable, more_kickable = self._kickable_positions( obs, info["moving_direction"]) for next_position in set.union(*[kickable, more_kickable]): # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) my_survivable_with_kick[next_position], my_prev_with_kick[next_position], my_succ_with_bomb[next_position], my_survivable_with_kick_enemy[next_position] \ = self._get_survivable_with_enemy(list_boards_with_kick[1:], next_position, enemy_position) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(my_survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=0) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( my_succ, 1, next_position) most_survivable_action = None if survivable_actions: survivable_score = dict() for action in n_survivable: #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum( [n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break if most_survivable_action is not None: print("Most survivable action", most_survivable_action) return most_survivable_action.value # kick if possible if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print("Kickable", my_kick, kickable) while kickable: next_position = kickable.pop() action = self._get_direction(my_position, next_position) print("Must kick to survive", action) return action.value # move towards a teammate if she is blocking for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility._position_is_item(board, next_position, my_teammate): print("Must move to teammate to survive", action) return action.value # move towards an enemy for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_enemy(board, next_position, my_enemies): print("Must move to enemy to survive", action) return action.value # move towards anywhere besides ridid for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_rigid(board, next_position): continue if utility.position_is_wood(board, next_position): continue if utility.position_is_bomb(info["curr_bombs"], next_position): continue print("Try moving to survive", action) return action.value action = constants.Action.Stop print("Must die", action) return action