Пример #1
0
def ffa_evaluate(env: Pomme, episodes, verbose, visualize, stop=False):
    """
    Evaluates the given pommerman environment (already includes the agents).

    :param episodes: The number of episodes
    :param verbose: Whether to print verbose status information
    :param visualize: Whether to visualize the execution
    :param stop: Whether to wait for input after each step
    :return: The results of the evaluation of shape (episodes, 5) where the first column [:, 0] contains the result
             of the match (tie, win, incomplete) and the remaining columns contain the individual (final) rewards.
    """

    # first element: result, additional elements: rewards
    steps = np.empty(episodes)
    results = np.empty((episodes, 1 + 4))

    start = time.time()

    # Run the episodes just like OpenAI Gym
    for i_episode in range(episodes):
        state = env.reset()
        done = False
        reward = []
        info = {}
        step = 0
        while not done:
            if visualize:
                env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
            step += 1

            if stop:
                input()

        steps[i_episode] = step

        result = info['result']
        # save the result
        results[i_episode, 0] = result.value
        results[i_episode, 1:] = reward

        if verbose:
            delta = time.time() - start
            print('\r{:.2f} sec > Episode {} finished with {} ({})'.format(
                delta, i_episode, result, reward))

            if i_episode % 10 == 9 and i_episode != episodes - 1:
                ffa_print_stats(results, steps, i_episode + 1)

    env.close()

    if verbose:
        delta = time.time() - start
        print("Total time: {:.2f} sec".format(delta))
        ffa_print_stats(results, steps, episodes)

    return results
Пример #2
0
def main():
    # Print all possible environments in the Pommerman registry
    print(pommerman.registry)

    config = ffa_v1_env()
    env = Pomme(**config["env_kwargs"])

    # Add 3 agents
    agents = {}
    for agent_id in range(4):
        agents[agent_id] = SimpleAgent(config["agent"](agent_id,
                                                       config["game_type"]))

    # agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows")

    env.set_agents(list(agents.values()))
    env.set_init_game_state(None)

    demo = []

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        demo.append(env.get_json_info())
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
            demo.append(env.get_json_info())
        if 1 in reward:
            winner = reward.index(1)
        else:
            winner = None

        print('Episode {} finished'.format(i_episode))
    env.close()

    # If game not tied, save demonstration
    if winner is not None:
        demonstration = {'demo': demo, 'winner': winner}
        pickle.dump(demonstration, open("demonstration.p", "wb"))
Пример #3
0
agents = {}
for agent_id in range(3):
    agents[agent_id] = StaticAgent(config["agent"](agent_id, config["game_type"]))

# Add human agent

agent_id += 1
agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows")

env.set_agents(list(agents.values()))
env.set_init_game_state(None)


# Seed and reset the environment
env.seed(0)
obs = env.reset()

# Run the agents until we're done
done = False
while not done:
    env.render()
    actions = env.act(obs) # brauch ich nicht
    #actions = [action % 4 for action in actions]
    #actions = [0,actions[1]]
    obs, reward, done, info = env.step(actions)
    #kacka = featurize(obs[0])
env.render(close=True)
env.close()

# Print the result
print(info)
class PomFFA(gym.Env):
    agent_list = [HoldAgent(), HoldAgent(), HoldAgent(), HoldAgent()]
    all_obs = None
    all_action = None
    cur_obs = None
    alive_agents = [10, 11, 12, 13]
    player_agent_id = 10

    def __init__(self, env_config=None):

        pomme_config = pommerman.configs.ffa_competition_env()

        if env_config:
            for k, v in env_config.items():
                if k in pomme_config['env_kwargs']:
                    pomme_config['env_kwargs'][k] = v

        print("pomme_config: ")
        print(pomme_config['env_kwargs'])

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(
            pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        self.total_reward = 0
        self.prev_alive = 4
        self.visited = np.zeros(shape=(11, 11))

        if not env_config or (env_config
                              and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            print(id_, pomm_config['game_type'])
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)

    def reset(self):
        obs = self.pomme.reset()
        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs
        obs = self.preproess(obs)
        self.total_reward = 0
        self.prev_alive = 4
        self.visited = np.zeros(shape=(11, 11))
        return obs

    def get_reward(self, obs, action, agent_id):
        if len(obs["alive"]) == 1:
            # An agent won. Give them +1, others -1.
            if agent_id in obs['alive']:
                return 1.0 - self.total_reward
            else:
                return -0.5

        if obs["step_count"] >= 500:
            # Game is over from time. Everyone gets -1.
            return -0.5

        # Game running: 0 for alive, -1 for dead.
        if agent_id not in obs['alive']:
            return -0.5

        x, y = obs["position"]
        blast = obs["bomb_blast_strength"]

        px = [1, -1, 0, 0]
        py = [0, 0, -1, 1]

        sum_reward = 0.0

        sum_reward += 20 * (len(obs["alive"]) - self.prev_alive)
        self.prev_alive = len(obs["alive"])

        if action == 0:
            sum_reward -= 0.1

        elif action == 5:
            # sum_reward += 1
            for i in range(4):
                tx = x + px[i]
                ty = y + py[i]
                if tx < 0 or tx > 10 or ty < 0 or ty > 10:
                    continue
                if obs["board"][tx][ty] == 1:
                    sum_reward += 2
                elif obs["board"][tx][ty] > 10:
                    sum_reward += 4
        else:
            assert (1 <= action <= 4), str(action)
            dx = x + px[action - 1]
            dy = y + py[action - 1]
            if (not (dx < 0 or dx > 10 or dy < 0
                     or dy > 10)) and obs["board"][dx][dy] == 0:
                if self.visited[dx][dy] > 0:
                    sum_reward -= 0.1
                else:
                    sum_reward += 0.3
                    self.visited[dx][dy] = 1

        sum_reward = sum_reward * 1.0 / 100.0
        new_total_reward = self.total_reward + sum_reward
        if new_total_reward > 0.8 or new_total_reward < -0.5:
            sum_reward = 0.0
        else:
            self.total_reward = new_total_reward

        return sum_reward

    def step(self, action):
        actions = self.pomme.act(self.all_obs)
        if self.alive_agents and self.player_agent_id in self.alive_agents:
            actions = self.set_for_training_agent(actions, action)
        else:
            actions = self.set_for_training_agent(actions, 0)
        obs, rewards, done, info = self.pomme.step(actions)

        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs
        reward = self.get_reward(self.cur_obs, action, self.player_agent_id)
        self.alive_agents = obs['alive']
        if (self.player_agent_id
                not in self.alive_agents) or obs["step_count"] >= 500:
            done = True
        obs = self.preproess(obs)
        return obs, reward, done, {}

    def get_for_training_agent(self, inputs):
        order = self.player_agent_id - 10
        return inputs[order].copy()

    def set_for_training_agent(self, inputs, value):
        order = self.player_agent_id - 10
        inputs[order] = value
        return inputs

    def init_observation_space(self, env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size'] or 11
        num_items = env_config['num_items'] or 11
        print("env config: {}".format(env_config))
        # board_size = 11

        board = spaces.Box(low=0,
                           high=len(constants.Item),
                           shape=(board_size, board_size))
        danger = spaces.Box(low=0, high=20, shape=(board_size, board_size))
        bomb_blast_strength = spaces.Box(low=0,
                                         high=num_items,
                                         shape=(board_size, board_size))
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=10, shape=(board_size, board_size))
        position = spaces.Box(low=0, high=board_size, shape=(2, ))
        blast_strength = spaces.Box(low=1, high=num_items, shape=(1, ))
        ammo = spaces.Box(low=0, high=num_items, shape=(1, ))
        # return spaces.Dict({"board": board,
        #                     "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life,
        #                     "flame_life": flame_life,
        #                     "position": position, "ammo": ammo, "blast_strength": blast_strength})
        return spaces.Dict({
            "board": board,
            "bomb_blast_strength": bomb_blast_strength,
            "bomb_life": bomb_life,
            "flame_life": flame_life,
            "position": position,
            "ammo": ammo,
            "blast_strength": blast_strength,
            "danger": danger
        })

    @staticmethod
    def preproess(obs):
        del obs["game_type"]
        del obs["game_env"]
        del obs["can_kick"]
        del obs["teammate"]
        del obs["enemies"]
        del obs["step_count"]
        del obs['alive']
        del obs['bomb_moving_direction']
        obs['position'] = np.array(obs['position'])
        obs['ammo'] = np.array([obs['ammo']])
        obs['blast_strength'] = np.array([obs['blast_strength']])

        board = obs['board']
        bomb_blast_strength = obs['bomb_blast_strength']
        bomb_life = obs['bomb_life']
        # flame_life = obs['flame_life']
        # position = obs['position']
        # ammo = obs['ammo']
        # blast_strength = obs['blast_strength']

        danger = np.ndarray(shape=(11, 11), dtype=int)

        for x in range(11):
            for y in range(11):
                danger[x][y] = 10
                if board[x][y] == 4:
                    board[x][y] = 0
                    danger[x][y] = 0
                elif board[x][y] == 3:
                    board[x][y] = 0
                elif board[x][y] == 10:
                    board[x][y] = 1
                elif board[x][y] > 10:
                    board[x][y] = 5
                elif 6 <= board[x][y] <= 8:
                    board[x][y] = 3
                elif board[x][y] == 1:
                    board[x][y] = 4

        for x in range(11):
            for y in range(11):
                if bomb_life[x][y] > 0:
                    strength = int(bomb_blast_strength[x][y] + 0.5)
                    for tx in range(max(0, x - strength + 1),
                                    min(11, x + strength)):
                        danger[tx][y] = min(danger[tx][y], bomb_life[x][y])
                    for ty in range(max(0, y - strength + 1),
                                    min(11, y + strength)):
                        danger[x][ty] = min(danger[x][ty], bomb_life[x][y])

        obs['danger'] = danger

        return obs

    def render(self):
        self.pomme.render()
Пример #5
0
class MultiAgent(MultiAgentEnv):
    def __init__(self):
        super(MultiAgent, self).__init__()
        self.phase = 0
        self.setup()

    def setup(self):
        agents = []
        if self.phase == 0:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 20
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 1:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 2:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(0, NoDoAgent(config["agent"](0,
                                                       config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 3:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(0, NoDoAgent(config["agent"](0,
                                                       config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 4:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 0
            config["env_kwargs"]["num_items"] = 10
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2,
                          SimpleAgent(config["agent"](2, config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        for agent_id in self.agents_index:
            agents.insert(
                agent_id,
                BaseLineAgent(config["agent"](agent_id, config["game_type"])))

        self.env.set_agents(agents)
        self.env.set_init_game_state(None)
        self.observation_space = spaces.Dict({
            "boards":
            spaces.Box(low=-1, high=20, shape=(3, 11, 11)),
            "states":
            spaces.Box(low=-1, high=20, shape=(9, )),
        })

        spaces.Box(low=-1.0, high=20.0, shape=(372, ), dtype=np.float32)
        self.action_space = self.env.action_space

    def set_phase(self, phase):
        print("learn phase " + str(phase))
        self.phase = phase
        self.setup()
        self.reset()

    def step(self, actions):
        obs = self.env.get_observations()
        all_actions = self.env.act(obs)
        for index in self.agents_index:
            try:
                action = actions[index]
            except:
                action = 0
            all_actions[index] = action

        step_obs = self.env.step(all_actions)
        obs, rew, done, info = {}, {}, {}, {}
        for i in actions.keys():
            obs[i], rew[i], done[i], info[i] = [
                featurize(step_obs[0][i]),
                step_obs[1][i],
                step_obs[1][i] == -1 or step_obs[2],
                step_obs[3],
            ]

        done["__all__"] = step_obs[2]
        return obs, rew, done, info

    def reset(self):
        obs = self.env.reset()
        return {i: featurize(obs[i]) for i in self.agents_index}
Пример #6
0
class MultiAgend(MultiAgentEnv):
    def __init__(self):
        super(MultiAgend, self).__init__()
        self.phase = 0
        self.next_phase = 0
        self.steps = 0
        self.last_featurize_obs = None
        self.setup()

    def featurize(self, obs):

        enemies = []
        for agent_id in self.enemies_agents_index:
            if agent_id == 0:
                enemies.append(Item.Agent0)
            if agent_id == 1:
                enemies.append(Item.Agent1)
            if agent_id == 2:
                enemies.append(Item.Agent2)
            if agent_id == 3:
                enemies.append(Item.Agent3)

        for enemie in obs["enemies"]:
            if enemie not in enemies:
                obs["board"] = ma.masked_equal(
                    obs["board"], enemie.value).filled(fill_value=0)

        board = np.copy(obs["board"])
        board[obs["position"][0], obs["position"][1]] = 0.0
        enemie_pos = np.full((11, 11), 0)
        for enemie in obs["enemies"]:
            enemie_pos = enemie_pos | ma.masked_not_equal(
                board, enemie.value).filled(fill_value=0)
            board = ma.masked_equal(board, enemie.value).filled(fill_value=0)

        wood = ma.masked_not_equal(board, 2).filled(fill_value=0)
        wood = (wood > 0).astype(np.float32)
        board = ma.masked_equal(board, 2).filled(fill_value=0)

        stone = ma.masked_not_equal(board, 1).filled(fill_value=0)
        stone = (stone > 0).astype(np.float32)
        board = ma.masked_equal(board, 1).filled(fill_value=0)
        enemie_pos = (enemie_pos > 0).astype(np.float32)

        board = ma.masked_equal(board,
                                obs["teammate"].value).filled(fill_value=0)

        flames = ma.masked_not_equal(board, 4).filled(fill_value=0)
        flames = (flames > 0).astype(np.float32)

        board = ma.masked_equal(board, 4).filled(fill_value=0)
        board = ma.masked_equal(board, 3).filled(fill_value=0)

        teammate_pos = ma.masked_not_equal(
            board, obs["teammate"].value).filled(fill_value=0)
        teammate_pos = (teammate_pos > 0).astype(np.float32)
        board = ma.masked_equal(board,
                                obs["teammate"].value).filled(fill_value=0)
        items = board.astype(np.float32)

        pos = np.full((11, 11), 0)
        pos[obs["position"][0], obs["position"][1]] = 1.0
        pos = pos.astype(np.float32)

        bomb_life = obs["bomb_life"].astype(np.float32)
        bomb_blast_strength = obs["bomb_blast_strength"].astype(np.float32)

        ammo = utility.make_np_float([obs["ammo"]])
        blast_strength = utility.make_np_float([obs["blast_strength"]])
        can_kick = utility.make_np_float([obs["can_kick"]])
        game_end = utility.make_np_float([
            (self.max_steps - self.steps) / self.max_steps
        ])

        actual_featurize_obs = {
            'boards':
            np.stack([
                enemie_pos, pos, wood, stone, items, flames, teammate_pos,
                bomb_life, bomb_blast_strength
            ],
                     axis=0),
            'states':
            np.concatenate([ammo, blast_strength, can_kick, game_end]),
        }

        if self.last_featurize_obs == None:
            featurize_obs = {
                'boards':
                np.concatenate([
                    actual_featurize_obs['boards'],
                    actual_featurize_obs['boards']
                ],
                               axis=0),
                'states':
                np.concatenate([
                    actual_featurize_obs['states'],
                    actual_featurize_obs['states']
                ]),
            }
        else:
            featurize_obs = {
                'boards':
                np.concatenate([
                    self.last_featurize_obs['boards'],
                    actual_featurize_obs['boards']
                ],
                               axis=0),
                'states':
                np.concatenate([
                    self.last_featurize_obs['states'],
                    actual_featurize_obs['states']
                ]),
            }

        self.last_featurize_obs = actual_featurize_obs
        return featurize_obs

    def setup(self):
        agents = []
        if self.phase == 0:
            arr = [0, 1]
            random.shuffle(arr)
            agents_index = arr.pop()
            op_index = arr.pop()
            self.agents_index = [agents_index]
            self.enemies_agents_index = [op_index]
            self.max_steps = 200
            config = ffa_v0_fast_env()
            config["env_kwargs"]["max_steps"] = self.max_steps
            agents.insert(
                agents_index,
                BaseLineAgent(config["agent"](agents_index,
                                              config["game_type"])))
            agents.insert(
                op_index,
                NoDoAgent(config["agent"](op_index, config["game_type"])))
            self.env = Pomme(**config["env_kwargs"])
            self.env.set_agents(agents)
            init_state = {
                'board_size': '11',
                'step_count': '0',
                'board': '',
                'agents':
                '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]',
                'bombs': '[]',
                'flames': '[]',
                'items': '[]',
                'intended_actions': '[0, 0]'
            }
            board = np.full((11, 11), 0)
            init_state['board'] = json.dumps(board.tolist())
            agents_json = json.loads(copy.copy(init_state['agents']))
            random_pos = np.random.choice(board.shape[0], (2, 2),
                                          replace=False)
            agents_json[0]["position"] = random_pos[0].tolist()
            agents_json[1]["position"] = random_pos[1].tolist()
            init_state['agents'] = json.dumps(agents_json)
            self.env._init_game_state = init_state
            self.env.reset()

        if self.phase == 1:
            arr = [0, 1]
            random.shuffle(arr)
            agents_index = arr.pop()
            op_index = arr.pop()
            self.agents_index = [agents_index]
            self.enemies_agents_index = [op_index]
            self.max_steps = 200
            config = ffa_v0_fast_env()
            config["env_kwargs"]["max_steps"] = self.max_steps
            agents.insert(
                agents_index,
                BaseLineAgent(config["agent"](agents_index,
                                              config["game_type"])))
            agents.insert(
                op_index,
                NoDoAgent(config["agent"](op_index, config["game_type"])))
            self.env = Pomme(**config["env_kwargs"])
            self.env.set_agents(agents)
            init_state = {
                'board_size': '11',
                'step_count': '0',
                'board': '',
                'agents':
                '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]',
                'bombs': '[]',
                'flames': '[]',
                'items': '[]',
                'intended_actions': '[0, 0]'
            }
            board = np.full((11, 11), 0)
            board[5, :] = (np.ones(11) * 2)
            agents_json = json.loads(copy.copy(init_state['agents']))
            agents_json[0]["position"] = [
                random.randint(0, 4),
                random.randint(0, 10)
            ]
            agents_json[1]["position"] = [
                random.randint(6, 10),
                random.randint(0, 10)
            ]
            init_state['agents'] = json.dumps(agents_json)
            init_state['board'] = json.dumps(board.tolist())
            self.env._init_game_state = init_state
            self.env.reset()

        self.observation_space = spaces.Dict({
            'boards':
            spaces.Box(low=-1, high=25, shape=(11, 11, 18), dtype=np.float32),
            'states':
            spaces.Box(low=-1, high=25, shape=(8, ), dtype=np.float32)
        })

        self.action_space = self.env.action_space

    def set_phase(self, phase):
        print("learn phase " + str(phase))
        self.next_phase = phase

    def close(self):
        self.env.close()

    def step(self, actions):
        self.steps = self.steps + 1
        obs = self.env.get_observations()
        all_actions = self.env.act(obs)
        assert (len(all_actions) == len(self.agents_index) +
                len(self.enemies_agents_index))

        for index in self.agents_index:
            try:
                action = actions[index]
            except:
                action = 0
            assert (all_actions[index] == None)
            all_actions[index] = action

        step_obs = self.env.step(all_actions)
        obs, rew, done, info = {}, {}, {}, {}
        for i in actions.keys():
            obs[i], rew[i], done[i], info[i] = [
                self.featurize(step_obs[0][i]), step_obs[1][i],
                step_obs[1][i] == -1 or step_obs[2], step_obs[3]
            ]

        done["__all__"] = step_obs[2]
        return obs, rew, done, info

    def reset(self):
        self.steps = 0
        self.phase = self.next_phase
        self.setup()
        obs = self.env.get_observations()
        return {i: self.featurize(obs[i]) for i in self.agents_index}
Пример #7
0
class PomFFA(gym.Env):

    def __init__(self, env_config=None):

        self.agent_list = [HoldAgent(), agents.SimpleAgent(), HoldAgent(), HoldAgent()]
        # self.agent_list = [agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.RandomAgent()]
        self.all_obs = None
        self.all_action = None
        self.cur_obs = None
        self.alive_agents = [10, 11, 12, 13]
        self.player_agent_id = 10
        self.total_reward = 0

        pomme_config = pommerman.configs.ffa_competition_env()

        if env_config:
            for k, v in env_config.items():
                if k in pomme_config['env_kwargs']:
                    pomme_config['env_kwargs'][k] = v

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        if not env_config or (env_config and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            print(id_, pomm_config['game_type'])
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)

    def reset(self):
        obs = self.pomme.reset()
        self.all_obs = obs.copy()
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs.copy()
        obs = self.preproess(obs)
        self.total_reward = 0
        return obs

    def get_reward(self, obs, action, agent_id):
        if len(obs["alive"]) == 1:
            # An agent won. Give them +1, others -1.
            if agent_id in obs['alive']:
                return 0.5
            else:
                return -0.5

        if obs["step_count"] >= 500:
            # Game is over from time. Everyone gets -1.
            return -0.5

        # Game running: 0 for alive, -1 for dead.
        if agent_id not in obs['alive']:
            return -0.5

        x, y = obs["position"]
        # blast = obs["bomb_blast_strength"]

        px = [0, 1, 0, -1]
        py = [1, 0, -1, 0]

        sum_reward = 0
        if action == 5:
            for i in range(4):
                tx = x+px[i]
                ty = y+py[i]
                if tx<0 or tx>10 or ty<0 or ty>10:
                    continue
                if obs["board"][tx][ty] == 1:
                    sum_reward += 1
                elif obs["board"][tx][ty] > 10:
                    sum_reward += 4

        sum_reward = sum_reward*1.0/200.0
        new_total_reward = self.total_reward + sum_reward
        if new_total_reward > 0.5 or new_total_reward < -0.5:
            sum_reward = 0
        else:
            self.total_reward = new_total_reward

        return sum_reward

    def step(self, action):
        actions = self.pomme.act(self.all_obs)
        if self.alive_agents and self.player_agent_id in self.alive_agents:
            actions = self.set_for_training_agent(actions, action)
        else:
            actions = self.set_for_training_agent(actions, 0)
        obs, rewards, done, info = self.pomme.step(actions)

        # print(obs)
        del self.all_obs
        self.all_obs = obs.copy()
        obs = self.get_for_training_agent(obs)
        del self.cur_obs
        self.cur_obs = obs.copy()
        reward = self.get_reward(self.cur_obs, action, self.player_agent_id)
        self.alive_agents = obs['alive']

        if self.player_agent_id not in self.alive_agents or self.cur_obs["step_count"] >= 500:
            done = True
        obs = self.preproess(obs)

        return obs, reward, done, {}

    def get_for_training_agent(self, inputs):
        order = self.player_agent_id - 10
        return inputs[order]

    def set_for_training_agent(self, inputs, value):
        order = self.player_agent_id - 10
        inputs[order] = value
        return inputs

    def init_observation_space(self, env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size']
        num_items = env_config['num_items']
        # print("env config: {}".format(env_config))
        # board_size = 11

        board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size))
        bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size))
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size))
        position = spaces.Box(low=0, high=board_size, shape=(2,))
        blast_strength = spaces.Box(low=1, high=num_items, shape=(1,))
        ammo = spaces.Box(low=0, high=num_items, shape=(1,))
        return spaces.Dict({"board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life,
                            "flame_life": flame_life,
                            "position": position, "ammo": ammo, "blast_strength": blast_strength})

    @staticmethod
    def preproess(obs):
        del obs["game_type"]
        del obs["game_env"]
        del obs["can_kick"]
        del obs["teammate"]
        del obs["enemies"]
        del obs["step_count"]
        del obs['alive']
        del obs['bomb_moving_direction']
        obs['position'] = np.array(obs['position'])
        obs['ammo'] = np.array([obs['ammo']])
        obs['blast_strength'] = np.array([obs['blast_strength']])
        return obs

    def render(self):
        self.pomme.render()
Пример #8
0
class PomFFA(gym.Env):
    agent_list = [
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent()
    ]
    alive_agents = [10, 11, 12, 13]
    agent_id = 10
    ammo = 1
    blast_strength = 2
    state = {}

    def __init__(self, env_config={}):
        pomme_config = pommerman.configs.ffa_competition_env()
        self.reward = Reward(env_config.get("reward"))

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(
            pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        if not env_config or (env_config
                              and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)
        self.init_state()

    def init_state(self):
        self.state['agent_id'] = self.agent_id
        self.state['alive'] = self.alive_agents
        self.state['visited'] = set()
        self.state['blast_strength'] = self.blast_strength
        self.state['ammo'] = self.ammo
        self.state["bombs"] = {}

    def reset(self):
        all_obs = self.pomme.reset()
        obs = self.get_for_training_agent(all_obs)
        self.init_state()

        self.state['prev_obs'] = copy.deepcopy(obs)
        self.state['all_obs'] = all_obs
        self.state['alive'] = obs['alive']

        obs = self.build_obs(obs, self.state)
        return obs

    def step(self, action):
        actions = self.pomme.act(self.state['all_obs'])
        actions = self.set_for_training_agent(actions, action)

        all_obs, _, _, _ = self.pomme.step(actions)
        obs = self.get_for_training_agent(all_obs)
        info = {'board': obs['board'], 'blast_strength': obs['blast_strength']}
        done = self.get_done(obs)
        reward, self.state = self.reward.get_reward(action, obs, self.state)

        self.state['prev_obs'] = copy.deepcopy(obs)
        self.state['all_obs'] = all_obs
        self.state['alive'] = obs['alive']
        self.state['blast_strength'] = obs['blast_strength']
        self.state['ammo'] = obs['ammo']

        obs = self.build_obs(obs, self.state)
        return obs, reward, done, info

    def get_for_training_agent(self, inputs):
        order = self.agent_id - 10
        return inputs[order]

    def set_for_training_agent(self, inputs, value):
        order = self.agent_id - 10
        inputs[order] = value
        return inputs

    def get_done(self, obs):
        if self.agent_id not in obs['alive']:
            return True
        if obs['step_count'] >= 800:
            return True
        return False

    def build_obs(self, obs, state):
        board = obs['board']
        bomb_blast_strength = obs['bomb_blast_strength']
        bomb_life = obs['bomb_life']
        flame_life = obs['flame_life']
        agent_id = state['agent_id']
        ammo = state['ammo']
        passage = np.zeros_like(board)
        wall = np.zeros_like(board)
        wood = np.zeros_like(board)
        bomb = np.zeros_like(board)
        bonus = np.zeros_like(board)
        me = np.zeros_like(board)
        enemy = np.zeros_like(board)
        for y in range(board.shape[0]):
            for x in range(board.shape[1]):
                v = board[y][x]
                if v == 0:
                    passage[y][x] = 1
                elif v == 1:
                    wall[y][x] = 1
                elif v == 2:
                    wood[y][x] = 1
                elif v == 3:
                    bomb = create_cross(bomb, (y, x),
                                        bomb_blast_strength[y][x])
                elif v == 4:
                    pass
                elif v == 6 or v == 7:
                    bonus[y][x] = 1
                elif v >= 10:
                    if v == agent_id:
                        me[y][x] = 1
                    else:
                        enemy[y][x] = 1
                    if bomb_blast_strength[y][x] > 0:
                        bomb = create_cross(bomb, (y, x),
                                            bomb_blast_strength[y][x])

        ammo = ammo * np.ones_like(board) / 12
        bomb_life /= 9
        flame_life /= 3
        board = np.transpose(
            np.stack([
                passage, wall, wood, bomb, bonus, me, enemy, bomb_life,
                flame_life, ammo
            ]), [1, 2, 0])
        return board

    @staticmethod
    def init_observation_space(env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size']
        num_items = env_config['num_items']

        board = spaces.Box(
            low=0, high=1,
            shape=(board_size, board_size,
                   10))  # passage,wall,wood,bomb,bonus,me,enemies
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size))
        ammo = spaces.Box(low=0,
                          high=num_items,
                          shape=(board_size, board_size))
        # return spaces.Dict({"board": board, "bomb_life": bomb_life, "flame_life": flame_life,"ammo": ammo})
        return board

    @staticmethod
    def init_action_space():
        return spaces.Discrete(6)

    def render(self):
        self.pomme.render()
Пример #9
0
class Pomme_v0(MultiAgentEnv):
    '''
    A wrapped Pommerman v0 environment for usage with Ray RLlib. The v0 environment is the base environment used in
    the NIPS'18 competition. Contrary to v1 it doesn't collapse walls and also doesn't allow for radio communication
    between agents (as does v2).

    Agents are identified by (string) agent IDs: `AGENT_IDS`
    (Note that these "agents" here are not to be confused with RLlib agents.)
    '''
    def __init__(self, config=pommerman_cfg.team_competition_env()):
        '''
        Initializes the Pommerman environment and adds Dummy Agents as expected by `Pomme`.

        Args:
            config (dict): A config defining the game mode. Options include FFA mode, team (2v2) and team radio (2v2).
            See pommerman's config.py and docs for more details.
        '''
        self.pomme = Pomme(**config['env_kwargs'])
        self.observation_space = dict
        self.action_space = self.pomme.action_space
        self.agent_names = AGENT_IDS
        agent_list = []
        for i in range(4):
            agent_id = i
            agent_list.append(
                agents.BaseAgent(config["agent"](agent_id,
                                                 config["game_type"])))
        self.pomme.set_agents(agent_list)
        self.pomme.set_init_game_state(None)

    def reset(self):
        """
        Resets the env and returns observations from ready agents.

        Returns:
            obs (dict): New observations for each ready agent.
        """
        obs_list = self.pomme.reset()
        #return {key: featurize(val) for key, val in to_dict(obs_list).items()}
        return {key: val for key, val in to_dict(obs_list).items()}

    def step(self, action_dict):
        """
        Returns observations from ready agents.
        The returns are dicts mapping from agent_id strings to values. The number of agents in the env can vary over
        time.

        Returns:
            obs (dict): New observations for each ready agent.
            rewards (dict): Reward values for each ready agent. If the episode is just started, the value will be zero.
            dones (dict): Done values for each ready agent. The key "__all__" is used to indicate the end of the game.
            infos (dict): Info values for each ready agent.
        """
        # default actions since Pommerman env expects actions even if agent is dead
        actions = {'agent_0': 0, 'agent_1': 0, 'agent_2': 0, 'agent_3': 0}
        # update actions with the ones returned from the policies
        actions.update(action_dict)
        # perform env step (expects a list)
        obs, rewards, done, info = self.pomme.step(list(actions.values()))
        # build 'dones' dictionary, key __all__ indicates env termination
        dones = {'__all__': done}
        # fetch all
        done_agents = to_dict(
            [not agent.is_alive for agent in self.pomme._agents])
        # filter done dictionary to only return agents which are still alive
        # -> apparently this is how rllib determines when agents "die"
        dones.update({key: val for key, val in done_agents.items() if not val})
        # turn info dict into dictionary with agent IDs as keys
        infos = {
            AGENT_IDS[i]: {info_k: info_v
                           for info_k, info_v in info.items()}
            for i in range(NUM_PLAYERS)
        }
        return to_dict(obs), to_dict(rewards), dones, infos
Пример #10
0
class PomFFA(gym.Env):
    agent_list = [
        agents.RandomAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent()
    ]
    all_obs = None
    all_action = None
    cur_obs = None
    alive_agents = [10, 11, 12, 13]
    player_agent_id = 10

    def __init__(self, env_config=None):

        pomme_config = pommerman.configs.ffa_competition_env()

        if env_config:
            for k, v in env_config.items():
                if k in pomme_config['env_kwargs']:
                    pomme_config['env_kwargs'][k] = v

        print("pomme_config: ")
        print(pomme_config['env_kwargs'])

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(
            pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        if not env_config or (env_config
                              and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            print(id_, pomm_config['game_type'])
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)

    def reset(self):
        obs = self.pomme.reset()
        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs
        obs = self.preproess(obs)
        return obs

    def get_reward(self, obs, action, agent_id):
        if len(obs["alive"]) == 1:
            # An agent won. Give them +1, others -1.
            if agent_id in obs['alive']:
                return 1
            else:
                return -1

        if obs["step_count"] >= 500:
            # Game is over from time. Everyone gets -1.
            return -1

        # Game running: 0 for alive, -1 for dead.
        if agent_id not in obs['alive']:
            return -1
        #
        # x, y = obs["position"]
        # blast = obs["bomb_blast_strength"]
        #
        # for w in range(11):
        #     if blast[x][w] > int(math.fabs(w-y)):
        #         return -10
        #
        #     if blast[w][y] > int(math.fabs((w-x))):
        #         return -10

        return 0

    def step(self, action):
        actions = self.pomme.act(self.all_obs)
        if self.alive_agents and self.player_agent_id in self.alive_agents:
            actions = self.set_for_training_agent(actions, action)
        else:
            actions = self.set_for_training_agent(actions, 0)
        obs, rewards, done, info = self.pomme.step(actions)

        # print(obs)

        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs
        reward = self.get_reward(self.cur_obs, action, self.player_agent_id)
        self.alive_agents = obs['alive']
        if (self.player_agent_id
                not in self.alive_agents) or obs["step_count"] >= 500:
            done = True
        obs = self.preproess(obs)
        return obs, reward, done, {}

    def get_for_training_agent(self, inputs):
        order = self.player_agent_id - 10
        return inputs[order]

    def set_for_training_agent(self, inputs, value):
        order = self.player_agent_id - 10
        inputs[order] = value
        return inputs

    def init_observation_space(self, env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size'] or 11
        num_items = env_config['num_items'] or 11
        print("env config: {}".format(env_config))
        # board_size = 11

        board = spaces.Box(low=0,
                           high=len(constants.Item),
                           shape=(board_size, board_size))
        bomb_blast_strength = spaces.Box(low=0,
                                         high=num_items,
                                         shape=(board_size, board_size))
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size))
        position = spaces.Box(low=0, high=board_size, shape=(2, ))
        blast_strength = spaces.Box(low=1, high=num_items, shape=(1, ))
        ammo = spaces.Box(low=0, high=num_items, shape=(1, ))
        return spaces.Dict({
            "board": board,
            "bomb_blast_strength": bomb_blast_strength,
            "bomb_life": bomb_life,
            "flame_life": flame_life,
            "position": position,
            "ammo": ammo,
            "blast_strength": blast_strength
        })

    @staticmethod
    def preproess(obs):
        del obs["game_type"]
        del obs["game_env"]
        del obs["can_kick"]
        del obs["teammate"]
        del obs["enemies"]
        del obs["step_count"]
        del obs['alive']
        del obs['bomb_moving_direction']
        obs['position'] = np.array(obs['position'])
        obs['ammo'] = np.array([obs['ammo']])
        obs['blast_strength'] = np.array([obs['blast_strength']])
        return obs

    def render(self):
        self.pomme.render()
Пример #11
0
class PomFFA(gym.Env):
    agent_list = [
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent()
    ]
    all_obs = None
    all_action = None
    pre_obs = None
    alive_agents = [10, 11, 12, 13]
    agent_id = 10
    state = {}

    def __init__(self, env_config=None):

        pomme_config = pommerman.configs.ffa_competition_env()

        if env_config:
            for k, v in env_config.items():
                if k in pomme_config['env_kwargs']:
                    pomme_config['env_kwargs'][k] = v
            self.reward = Reward(env_config.get("reward"))
        else:
            self.reward = Reward()

        print("Pommerman Config:", pomme_config['env_kwargs'])

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(
            pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        if not env_config or (env_config
                              and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)

    def reset(self):
        obs = self.pomme.reset()
        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        state = {
            "prev_obs": None,
            "visited": set(),
            "agent_id": 10,
            "alive": [10, 11, 12, 13],
            "strength": 2,
            "ammo": 1,
            "bombs": {},
        }
        state['prev_obs'] = copy.deepcopy(obs)
        state['position'] = obs['position']
        self.state = state
        obs = self.preproess(obs)
        return obs

    def step(self, action):
        actions = self.pomme.act(self.all_obs)
        actions = self.set_for_training_agent(actions, action)

        obs, rewards, _, _ = self.pomme.step(actions)
        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        reward, self.state = self.reward.get_reward(action, obs, self.state)
        done = self.get_done(obs)
        self.state['prev_obs'] = copy.deepcopy(obs)
        self.state['position'] = obs['position']
        obs = self.preproess(obs)

        return obs, reward, done, {}

    def get_for_training_agent(self, inputs):
        order = self.agent_id - 10
        return inputs[order]

    def set_for_training_agent(self, inputs, value):
        order = self.agent_id - 10
        inputs[order] = value
        return inputs

    def get_done(self, obs):
        if self.agent_id not in obs['alive']:
            return True
        if obs['step_count'] >= 800:
            return True
        return False

    @staticmethod
    def init_observation_space(env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size']
        num_items = env_config['num_items']

        board = spaces.Box(low=0,
                           high=len(constants.Item),
                           shape=(board_size, board_size))
        bomb_blast_strength = spaces.Box(low=0,
                                         high=num_items,
                                         shape=(board_size, board_size))
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size))
        position = spaces.Box(low=0, high=board_size, shape=(2, ))
        blast_strength = spaces.Box(low=1, high=num_items, shape=(1, ))
        ammo = spaces.Box(low=0, high=num_items, shape=(1, ))
        return spaces.Dict({
            "board": board,
            "bomb_blast_strength": bomb_blast_strength,
            "bomb_life": bomb_life,
            "flame_life": flame_life,
            "position": position,
            "ammo": ammo,
            "blast_strength": blast_strength
        })

    @staticmethod
    def init_action_space():
        return spaces.Discrete(6)

    @staticmethod
    def preproess(obs):
        del obs["game_type"]
        del obs["game_env"]
        del obs["can_kick"]
        del obs["teammate"]
        del obs["enemies"]
        del obs["step_count"]
        del obs['alive']
        del obs['bomb_moving_direction']

        obs['position'] = np.array(obs['position'])
        obs['ammo'] = np.array([obs['ammo']])
        obs['blast_strength'] = np.array([obs['blast_strength']])

        return obs

    def render(self):
        self.pomme.render()