def ffa_evaluate(env: Pomme, episodes, verbose, visualize, stop=False): """ Evaluates the given pommerman environment (already includes the agents). :param episodes: The number of episodes :param verbose: Whether to print verbose status information :param visualize: Whether to visualize the execution :param stop: Whether to wait for input after each step :return: The results of the evaluation of shape (episodes, 5) where the first column [:, 0] contains the result of the match (tie, win, incomplete) and the remaining columns contain the individual (final) rewards. """ # first element: result, additional elements: rewards steps = np.empty(episodes) results = np.empty((episodes, 1 + 4)) start = time.time() # Run the episodes just like OpenAI Gym for i_episode in range(episodes): state = env.reset() done = False reward = [] info = {} step = 0 while not done: if visualize: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) step += 1 if stop: input() steps[i_episode] = step result = info['result'] # save the result results[i_episode, 0] = result.value results[i_episode, 1:] = reward if verbose: delta = time.time() - start print('\r{:.2f} sec > Episode {} finished with {} ({})'.format( delta, i_episode, result, reward)) if i_episode % 10 == 9 and i_episode != episodes - 1: ffa_print_stats(results, steps, i_episode + 1) env.close() if verbose: delta = time.time() - start print("Total time: {:.2f} sec".format(delta)) ffa_print_stats(results, steps, episodes) return results
def main(): # Print all possible environments in the Pommerman registry print(pommerman.registry) config = ffa_v1_env() env = Pomme(**config["env_kwargs"]) # Add 3 agents agents = {} for agent_id in range(4): agents[agent_id] = SimpleAgent(config["agent"](agent_id, config["game_type"])) # agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows") env.set_agents(list(agents.values())) env.set_init_game_state(None) demo = [] # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False demo.append(env.get_json_info()) while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) demo.append(env.get_json_info()) if 1 in reward: winner = reward.index(1) else: winner = None print('Episode {} finished'.format(i_episode)) env.close() # If game not tied, save demonstration if winner is not None: demonstration = {'demo': demo, 'winner': winner} pickle.dump(demonstration, open("demonstration.p", "wb"))
agents = {} for agent_id in range(3): agents[agent_id] = StaticAgent(config["agent"](agent_id, config["game_type"])) # Add human agent agent_id += 1 agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows") env.set_agents(list(agents.values())) env.set_init_game_state(None) # Seed and reset the environment env.seed(0) obs = env.reset() # Run the agents until we're done done = False while not done: env.render() actions = env.act(obs) # brauch ich nicht #actions = [action % 4 for action in actions] #actions = [0,actions[1]] obs, reward, done, info = env.step(actions) #kacka = featurize(obs[0]) env.render(close=True) env.close() # Print the result print(info)
class PomFFA(gym.Env): agent_list = [HoldAgent(), HoldAgent(), HoldAgent(), HoldAgent()] all_obs = None all_action = None cur_obs = None alive_agents = [10, 11, 12, 13] player_agent_id = 10 def __init__(self, env_config=None): pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v print("pomme_config: ") print(pomme_config['env_kwargs']) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space self.total_reward = 0 self.prev_alive = 4 self.visited = np.zeros(shape=(11, 11)) if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) print(id_, pomm_config['game_type']) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) def reset(self): obs = self.pomme.reset() self.all_obs = obs obs = self.get_for_training_agent(obs) self.cur_obs = obs obs = self.preproess(obs) self.total_reward = 0 self.prev_alive = 4 self.visited = np.zeros(shape=(11, 11)) return obs def get_reward(self, obs, action, agent_id): if len(obs["alive"]) == 1: # An agent won. Give them +1, others -1. if agent_id in obs['alive']: return 1.0 - self.total_reward else: return -0.5 if obs["step_count"] >= 500: # Game is over from time. Everyone gets -1. return -0.5 # Game running: 0 for alive, -1 for dead. if agent_id not in obs['alive']: return -0.5 x, y = obs["position"] blast = obs["bomb_blast_strength"] px = [1, -1, 0, 0] py = [0, 0, -1, 1] sum_reward = 0.0 sum_reward += 20 * (len(obs["alive"]) - self.prev_alive) self.prev_alive = len(obs["alive"]) if action == 0: sum_reward -= 0.1 elif action == 5: # sum_reward += 1 for i in range(4): tx = x + px[i] ty = y + py[i] if tx < 0 or tx > 10 or ty < 0 or ty > 10: continue if obs["board"][tx][ty] == 1: sum_reward += 2 elif obs["board"][tx][ty] > 10: sum_reward += 4 else: assert (1 <= action <= 4), str(action) dx = x + px[action - 1] dy = y + py[action - 1] if (not (dx < 0 or dx > 10 or dy < 0 or dy > 10)) and obs["board"][dx][dy] == 0: if self.visited[dx][dy] > 0: sum_reward -= 0.1 else: sum_reward += 0.3 self.visited[dx][dy] = 1 sum_reward = sum_reward * 1.0 / 100.0 new_total_reward = self.total_reward + sum_reward if new_total_reward > 0.8 or new_total_reward < -0.5: sum_reward = 0.0 else: self.total_reward = new_total_reward return sum_reward def step(self, action): actions = self.pomme.act(self.all_obs) if self.alive_agents and self.player_agent_id in self.alive_agents: actions = self.set_for_training_agent(actions, action) else: actions = self.set_for_training_agent(actions, 0) obs, rewards, done, info = self.pomme.step(actions) self.all_obs = obs obs = self.get_for_training_agent(obs) self.cur_obs = obs reward = self.get_reward(self.cur_obs, action, self.player_agent_id) self.alive_agents = obs['alive'] if (self.player_agent_id not in self.alive_agents) or obs["step_count"] >= 500: done = True obs = self.preproess(obs) return obs, reward, done, {} def get_for_training_agent(self, inputs): order = self.player_agent_id - 10 return inputs[order].copy() def set_for_training_agent(self, inputs, value): order = self.player_agent_id - 10 inputs[order] = value return inputs def init_observation_space(self, env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] or 11 num_items = env_config['num_items'] or 11 print("env config: {}".format(env_config)) # board_size = 11 board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size)) danger = spaces.Box(low=0, high=20, shape=(board_size, board_size)) bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=10, shape=(board_size, board_size)) position = spaces.Box(low=0, high=board_size, shape=(2, )) blast_strength = spaces.Box(low=1, high=num_items, shape=(1, )) ammo = spaces.Box(low=0, high=num_items, shape=(1, )) # return spaces.Dict({"board": board, # "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, # "flame_life": flame_life, # "position": position, "ammo": ammo, "blast_strength": blast_strength}) return spaces.Dict({ "board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, "flame_life": flame_life, "position": position, "ammo": ammo, "blast_strength": blast_strength, "danger": danger }) @staticmethod def preproess(obs): del obs["game_type"] del obs["game_env"] del obs["can_kick"] del obs["teammate"] del obs["enemies"] del obs["step_count"] del obs['alive'] del obs['bomb_moving_direction'] obs['position'] = np.array(obs['position']) obs['ammo'] = np.array([obs['ammo']]) obs['blast_strength'] = np.array([obs['blast_strength']]) board = obs['board'] bomb_blast_strength = obs['bomb_blast_strength'] bomb_life = obs['bomb_life'] # flame_life = obs['flame_life'] # position = obs['position'] # ammo = obs['ammo'] # blast_strength = obs['blast_strength'] danger = np.ndarray(shape=(11, 11), dtype=int) for x in range(11): for y in range(11): danger[x][y] = 10 if board[x][y] == 4: board[x][y] = 0 danger[x][y] = 0 elif board[x][y] == 3: board[x][y] = 0 elif board[x][y] == 10: board[x][y] = 1 elif board[x][y] > 10: board[x][y] = 5 elif 6 <= board[x][y] <= 8: board[x][y] = 3 elif board[x][y] == 1: board[x][y] = 4 for x in range(11): for y in range(11): if bomb_life[x][y] > 0: strength = int(bomb_blast_strength[x][y] + 0.5) for tx in range(max(0, x - strength + 1), min(11, x + strength)): danger[tx][y] = min(danger[tx][y], bomb_life[x][y]) for ty in range(max(0, y - strength + 1), min(11, y + strength)): danger[x][ty] = min(danger[x][ty], bomb_life[x][y]) obs['danger'] = danger return obs def render(self): self.pomme.render()
class PomFFA(gym.Env): def __init__(self, env_config=None): self.agent_list = [HoldAgent(), agents.SimpleAgent(), HoldAgent(), HoldAgent()] # self.agent_list = [agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.RandomAgent()] self.all_obs = None self.all_action = None self.cur_obs = None self.alive_agents = [10, 11, 12, 13] self.player_agent_id = 10 self.total_reward = 0 pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space(pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) print(id_, pomm_config['game_type']) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) def reset(self): obs = self.pomme.reset() self.all_obs = obs.copy() obs = self.get_for_training_agent(obs) self.cur_obs = obs.copy() obs = self.preproess(obs) self.total_reward = 0 return obs def get_reward(self, obs, action, agent_id): if len(obs["alive"]) == 1: # An agent won. Give them +1, others -1. if agent_id in obs['alive']: return 0.5 else: return -0.5 if obs["step_count"] >= 500: # Game is over from time. Everyone gets -1. return -0.5 # Game running: 0 for alive, -1 for dead. if agent_id not in obs['alive']: return -0.5 x, y = obs["position"] # blast = obs["bomb_blast_strength"] px = [0, 1, 0, -1] py = [1, 0, -1, 0] sum_reward = 0 if action == 5: for i in range(4): tx = x+px[i] ty = y+py[i] if tx<0 or tx>10 or ty<0 or ty>10: continue if obs["board"][tx][ty] == 1: sum_reward += 1 elif obs["board"][tx][ty] > 10: sum_reward += 4 sum_reward = sum_reward*1.0/200.0 new_total_reward = self.total_reward + sum_reward if new_total_reward > 0.5 or new_total_reward < -0.5: sum_reward = 0 else: self.total_reward = new_total_reward return sum_reward def step(self, action): actions = self.pomme.act(self.all_obs) if self.alive_agents and self.player_agent_id in self.alive_agents: actions = self.set_for_training_agent(actions, action) else: actions = self.set_for_training_agent(actions, 0) obs, rewards, done, info = self.pomme.step(actions) # print(obs) del self.all_obs self.all_obs = obs.copy() obs = self.get_for_training_agent(obs) del self.cur_obs self.cur_obs = obs.copy() reward = self.get_reward(self.cur_obs, action, self.player_agent_id) self.alive_agents = obs['alive'] if self.player_agent_id not in self.alive_agents or self.cur_obs["step_count"] >= 500: done = True obs = self.preproess(obs) return obs, reward, done, {} def get_for_training_agent(self, inputs): order = self.player_agent_id - 10 return inputs[order] def set_for_training_agent(self, inputs, value): order = self.player_agent_id - 10 inputs[order] = value return inputs def init_observation_space(self, env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] num_items = env_config['num_items'] # print("env config: {}".format(env_config)) # board_size = 11 board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size)) bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size)) position = spaces.Box(low=0, high=board_size, shape=(2,)) blast_strength = spaces.Box(low=1, high=num_items, shape=(1,)) ammo = spaces.Box(low=0, high=num_items, shape=(1,)) return spaces.Dict({"board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, "flame_life": flame_life, "position": position, "ammo": ammo, "blast_strength": blast_strength}) @staticmethod def preproess(obs): del obs["game_type"] del obs["game_env"] del obs["can_kick"] del obs["teammate"] del obs["enemies"] del obs["step_count"] del obs['alive'] del obs['bomb_moving_direction'] obs['position'] = np.array(obs['position']) obs['ammo'] = np.array([obs['ammo']]) obs['blast_strength'] = np.array([obs['blast_strength']]) return obs def render(self): self.pomme.render()
class PomFFA(gym.Env): agent_list = [ agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent() ] alive_agents = [10, 11, 12, 13] agent_id = 10 ammo = 1 blast_strength = 2 state = {} def __init__(self, env_config={}): pomme_config = pommerman.configs.ffa_competition_env() self.reward = Reward(env_config.get("reward")) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) self.init_state() def init_state(self): self.state['agent_id'] = self.agent_id self.state['alive'] = self.alive_agents self.state['visited'] = set() self.state['blast_strength'] = self.blast_strength self.state['ammo'] = self.ammo self.state["bombs"] = {} def reset(self): all_obs = self.pomme.reset() obs = self.get_for_training_agent(all_obs) self.init_state() self.state['prev_obs'] = copy.deepcopy(obs) self.state['all_obs'] = all_obs self.state['alive'] = obs['alive'] obs = self.build_obs(obs, self.state) return obs def step(self, action): actions = self.pomme.act(self.state['all_obs']) actions = self.set_for_training_agent(actions, action) all_obs, _, _, _ = self.pomme.step(actions) obs = self.get_for_training_agent(all_obs) info = {'board': obs['board'], 'blast_strength': obs['blast_strength']} done = self.get_done(obs) reward, self.state = self.reward.get_reward(action, obs, self.state) self.state['prev_obs'] = copy.deepcopy(obs) self.state['all_obs'] = all_obs self.state['alive'] = obs['alive'] self.state['blast_strength'] = obs['blast_strength'] self.state['ammo'] = obs['ammo'] obs = self.build_obs(obs, self.state) return obs, reward, done, info def get_for_training_agent(self, inputs): order = self.agent_id - 10 return inputs[order] def set_for_training_agent(self, inputs, value): order = self.agent_id - 10 inputs[order] = value return inputs def get_done(self, obs): if self.agent_id not in obs['alive']: return True if obs['step_count'] >= 800: return True return False def build_obs(self, obs, state): board = obs['board'] bomb_blast_strength = obs['bomb_blast_strength'] bomb_life = obs['bomb_life'] flame_life = obs['flame_life'] agent_id = state['agent_id'] ammo = state['ammo'] passage = np.zeros_like(board) wall = np.zeros_like(board) wood = np.zeros_like(board) bomb = np.zeros_like(board) bonus = np.zeros_like(board) me = np.zeros_like(board) enemy = np.zeros_like(board) for y in range(board.shape[0]): for x in range(board.shape[1]): v = board[y][x] if v == 0: passage[y][x] = 1 elif v == 1: wall[y][x] = 1 elif v == 2: wood[y][x] = 1 elif v == 3: bomb = create_cross(bomb, (y, x), bomb_blast_strength[y][x]) elif v == 4: pass elif v == 6 or v == 7: bonus[y][x] = 1 elif v >= 10: if v == agent_id: me[y][x] = 1 else: enemy[y][x] = 1 if bomb_blast_strength[y][x] > 0: bomb = create_cross(bomb, (y, x), bomb_blast_strength[y][x]) ammo = ammo * np.ones_like(board) / 12 bomb_life /= 9 flame_life /= 3 board = np.transpose( np.stack([ passage, wall, wood, bomb, bonus, me, enemy, bomb_life, flame_life, ammo ]), [1, 2, 0]) return board @staticmethod def init_observation_space(env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] num_items = env_config['num_items'] board = spaces.Box( low=0, high=1, shape=(board_size, board_size, 10)) # passage,wall,wood,bomb,bonus,me,enemies bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size)) ammo = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) # return spaces.Dict({"board": board, "bomb_life": bomb_life, "flame_life": flame_life,"ammo": ammo}) return board @staticmethod def init_action_space(): return spaces.Discrete(6) def render(self): self.pomme.render()
class PomFFA(gym.Env): agent_list = [ agents.RandomAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent() ] all_obs = None all_action = None cur_obs = None alive_agents = [10, 11, 12, 13] player_agent_id = 10 def __init__(self, env_config=None): pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v print("pomme_config: ") print(pomme_config['env_kwargs']) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) print(id_, pomm_config['game_type']) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) def reset(self): obs = self.pomme.reset() self.all_obs = obs obs = self.get_for_training_agent(obs) self.cur_obs = obs obs = self.preproess(obs) return obs def get_reward(self, obs, action, agent_id): if len(obs["alive"]) == 1: # An agent won. Give them +1, others -1. if agent_id in obs['alive']: return 1 else: return -1 if obs["step_count"] >= 500: # Game is over from time. Everyone gets -1. return -1 # Game running: 0 for alive, -1 for dead. if agent_id not in obs['alive']: return -1 # # x, y = obs["position"] # blast = obs["bomb_blast_strength"] # # for w in range(11): # if blast[x][w] > int(math.fabs(w-y)): # return -10 # # if blast[w][y] > int(math.fabs((w-x))): # return -10 return 0 def step(self, action): actions = self.pomme.act(self.all_obs) if self.alive_agents and self.player_agent_id in self.alive_agents: actions = self.set_for_training_agent(actions, action) else: actions = self.set_for_training_agent(actions, 0) obs, rewards, done, info = self.pomme.step(actions) # print(obs) self.all_obs = obs obs = self.get_for_training_agent(obs) self.cur_obs = obs reward = self.get_reward(self.cur_obs, action, self.player_agent_id) self.alive_agents = obs['alive'] if (self.player_agent_id not in self.alive_agents) or obs["step_count"] >= 500: done = True obs = self.preproess(obs) return obs, reward, done, {} def get_for_training_agent(self, inputs): order = self.player_agent_id - 10 return inputs[order] def set_for_training_agent(self, inputs, value): order = self.player_agent_id - 10 inputs[order] = value return inputs def init_observation_space(self, env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] or 11 num_items = env_config['num_items'] or 11 print("env config: {}".format(env_config)) # board_size = 11 board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size)) bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size)) position = spaces.Box(low=0, high=board_size, shape=(2, )) blast_strength = spaces.Box(low=1, high=num_items, shape=(1, )) ammo = spaces.Box(low=0, high=num_items, shape=(1, )) return spaces.Dict({ "board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, "flame_life": flame_life, "position": position, "ammo": ammo, "blast_strength": blast_strength }) @staticmethod def preproess(obs): del obs["game_type"] del obs["game_env"] del obs["can_kick"] del obs["teammate"] del obs["enemies"] del obs["step_count"] del obs['alive'] del obs['bomb_moving_direction'] obs['position'] = np.array(obs['position']) obs['ammo'] = np.array([obs['ammo']]) obs['blast_strength'] = np.array([obs['blast_strength']]) return obs def render(self): self.pomme.render()
class PomFFA(gym.Env): agent_list = [ agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent() ] all_obs = None all_action = None pre_obs = None alive_agents = [10, 11, 12, 13] agent_id = 10 state = {} def __init__(self, env_config=None): pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v self.reward = Reward(env_config.get("reward")) else: self.reward = Reward() print("Pommerman Config:", pomme_config['env_kwargs']) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) def reset(self): obs = self.pomme.reset() self.all_obs = obs obs = self.get_for_training_agent(obs) state = { "prev_obs": None, "visited": set(), "agent_id": 10, "alive": [10, 11, 12, 13], "strength": 2, "ammo": 1, "bombs": {}, } state['prev_obs'] = copy.deepcopy(obs) state['position'] = obs['position'] self.state = state obs = self.preproess(obs) return obs def step(self, action): actions = self.pomme.act(self.all_obs) actions = self.set_for_training_agent(actions, action) obs, rewards, _, _ = self.pomme.step(actions) self.all_obs = obs obs = self.get_for_training_agent(obs) reward, self.state = self.reward.get_reward(action, obs, self.state) done = self.get_done(obs) self.state['prev_obs'] = copy.deepcopy(obs) self.state['position'] = obs['position'] obs = self.preproess(obs) return obs, reward, done, {} def get_for_training_agent(self, inputs): order = self.agent_id - 10 return inputs[order] def set_for_training_agent(self, inputs, value): order = self.agent_id - 10 inputs[order] = value return inputs def get_done(self, obs): if self.agent_id not in obs['alive']: return True if obs['step_count'] >= 800: return True return False @staticmethod def init_observation_space(env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] num_items = env_config['num_items'] board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size)) bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size)) position = spaces.Box(low=0, high=board_size, shape=(2, )) blast_strength = spaces.Box(low=1, high=num_items, shape=(1, )) ammo = spaces.Box(low=0, high=num_items, shape=(1, )) return spaces.Dict({ "board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, "flame_life": flame_life, "position": position, "ammo": ammo, "blast_strength": blast_strength }) @staticmethod def init_action_space(): return spaces.Discrete(6) @staticmethod def preproess(obs): del obs["game_type"] del obs["game_env"] del obs["can_kick"] del obs["teammate"] del obs["enemies"] del obs["step_count"] del obs['alive'] del obs['bomb_moving_direction'] obs['position'] = np.array(obs['position']) obs['ammo'] = np.array([obs['ammo']]) obs['blast_strength'] = np.array([obs['blast_strength']]) return obs def render(self): self.pomme.render()