def evaluate_agent(agent, config, selected_labels=None, agent_id=0, iterations=100, plot=True): # Instantiate the environment env = Pomme(**config["env_kwargs"]) info = [] rewards = np.zeros((iterations, 4)) lengths = np.zeros((iterations, 4)) if isinstance(agent, EvaluatorAgent): agent.reset_run() start_time = time.time() for i in tqdm(range(iterations)): # print('{}/{}'.format(i+1, iterations), end='\r') info_ep, reward, lens = run_episode(agent, config, env, agent_id) info.append(info_ep) rewards[i] = reward lengths[i] = lens if isinstance(agent, EvaluatorAgent): agent.end_episode() if plot: plot_statistics(agent, info, selected_labels, agent_id, iterations) elapsed = time.time() - start_time return info, rewards, lengths, elapsed
def __init__(self, env_config=None): pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v print("pomme_config: ") print(pomme_config['env_kwargs']) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space self.total_reward = 0 self.prev_alive = 4 self.visited = np.zeros(shape=(11, 11)) if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config)
def __init__(self, env_config=None): self.agent_list = [HoldAgent(), agents.SimpleAgent(), HoldAgent(), HoldAgent()] # self.agent_list = [agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.RandomAgent()] self.all_obs = None self.all_action = None self.cur_obs = None self.alive_agents = [10, 11, 12, 13] self.player_agent_id = 10 self.total_reward = 0 pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space(pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config)
def makeTrainingObservation(): env = Pomme(**config["env_kwargs"]) agents = {} for agent_id in range(num_players): agent = TrainingAgent(config["agent"](agent_id, config["game_type"])) agents[agent_id] = agent env.set_agents(list(agents.values())) env.set_init_game_state(None) return env
def env_for_players(self): config = ffa_v0_fast_env(30) env = Pomme(**config["env_kwargs"]) agents = [DQN(config["agent"](0, config["game_type"])), PlayerAgent(config["agent"](1, config["game_type"])), RandomAgent(config["agent"](2, config["game_type"])), RandomAgent(config["agent"](3, config["game_type"]))] env.set_agents(agents) env.set_training_agent(agents[0].agent_id) # training_agent is only dqn agent env.set_init_game_state(None) return env
def _thunk(): env = Pomme(**config["env_kwargs"]) agents = {} for agent_id in range(num_players): agent = TrainingAgent(config["agent"](agent_id, config["game_type"])) agents[agent_id] = agent simple_Agent_id = num_players agents[simple_Agent_id] = SimpleAgent(config["agent"]( simple_Agent_id, config["game_type"])) env.set_agents(list(agents.values())) env.set_init_game_state(None) return env
def __init__(self, env_config={}): pomme_config = pommerman.configs.ffa_competition_env() self.reward = Reward(env_config.get("reward")) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config)
def make_env(self, config): # Instantiate the environment env = Pomme(**config["env_kwargs"]) # Add agents agents = [] for agent_id in range(NUM_AGENTS): if agent_id == self.agent_id: agents.append(self) else: agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_init_game_state(None) return env
def set_pommerman_env(agent_id=0): # Instantiate the environment config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) np.random.seed(0) env.seed(0) # Add 3 Simple Agents and 1 DQN agent agents = [ DQN(config["agent"](agent_id, config["game_type"])) if i == agent_id else SimpleAgent(config["agent"](i, config["game_type"])) for i in range(4) ] env.set_agents(agents) env.set_training_agent( agents[agent_id].agent_id) # training_agent is only dqn agent env.set_init_game_state(None) return env
def get_env(): config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) agent_id = 0 agents = [ DQN(config["agent"](0, config["game_type"])), SimpleAgent(config["agent"](1, config["game_type"])), SimpleAgent(config["agent"](2, config["game_type"])), SimpleAgent(config["agent"](3, config["game_type"])), ] env.set_agents(agents) env.set_training_agent(agents[agent_id].agent_id) env.set_init_game_state(None) return env
def main(): # Print all possible environments in the Pommerman registry print(pommerman.registry) config = ffa_v1_env() env = Pomme(**config["env_kwargs"]) # Add 3 agents agents = {} for agent_id in range(4): agents[agent_id] = SimpleAgent(config["agent"](agent_id, config["game_type"])) # agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows") env.set_agents(list(agents.values())) env.set_init_game_state(None) demo = [] # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False demo.append(env.get_json_info()) while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) demo.append(env.get_json_info()) if 1 in reward: winner = reward.index(1) else: winner = None print('Episode {} finished'.format(i_episode)) env.close() # If game not tied, save demonstration if winner is not None: demonstration = {'demo': demo, 'winner': winner} pickle.dump(demonstration, open("demonstration.p", "wb"))
def __init__(self, config=pommerman_cfg.team_competition_env()): ''' Initializes the Pommerman environment and adds Dummy Agents as expected by `Pomme`. Args: config (dict): A config defining the game mode. Options include FFA mode, team (2v2) and team radio (2v2). See pommerman's config.py and docs for more details. ''' self.pomme = Pomme(**config['env_kwargs']) self.observation_space = dict self.action_space = self.pomme.action_space self.agent_names = AGENT_IDS agent_list = [] for i in range(4): agent_id = i agent_list.append( agents.BaseAgent(config["agent"](agent_id, config["game_type"]))) self.pomme.set_agents(agent_list) self.pomme.set_init_game_state(None)
def __init__(self, env_config=None): pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v self.reward = Reward(env_config.get("reward")) else: self.reward = Reward() print("Pommerman Config:", pomme_config['env_kwargs']) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config)
def setup(self): agents = [] if self.phase == 0: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 20 agents.insert( 0, SuicidalAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 1: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 36 agents.insert( 0, SuicidalAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 2: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 36 agents.insert(0, NoDoAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 3: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 36 agents.insert(0, NoDoAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 4: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 0 config["env_kwargs"]["num_items"] = 10 config["env_kwargs"]["num_rigid"] = 36 agents.insert( 0, SuicidalAgent(config["agent"](0, config["game_type"]))) agents.insert(2, SimpleAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() for agent_id in self.agents_index: agents.insert( agent_id, BaseLineAgent(config["agent"](agent_id, config["game_type"]))) self.env.set_agents(agents) self.env.set_init_game_state(None) self.observation_space = spaces.Dict({ "boards": spaces.Box(low=-1, high=20, shape=(3, 11, 11)), "states": spaces.Box(low=-1, high=20, shape=(9, )), }) spaces.Box(low=-1.0, high=20.0, shape=(372, ), dtype=np.float32) self.action_space = self.env.action_space
def main(): # Print all possible environments in the Pommerman registry # Instantiate the environment DETERMINISTIC = False VISUALIZE = False if args.test: DETERMINISTIC = True VISUALIZE = True config = ffa_competition_env() env = Pomme(**config["env_kwargs"]) env.seed(0) # Create a Proximal Policy Optimization agent with open('ppo.json', 'r') as fp: agent = json.load(fp=fp) with open('mlp2_lstm_network.json', 'r') as fp: network = json.load(fp=fp) agent = Agent.from_spec( spec=agent, kwargs=dict( states=dict(type='float', shape=env.observation_space.shape), actions=dict(type='int', num_actions=env.action_space.n), network=network ) ) # Add 3 random agents agents = [] for agent_id in range(3): agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"]))) # Add TensorforceAgent agent_id += 1 agents.append(TensorforceAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) # Instantiate and run the environment for 5 episodes. if VISUALIZE: wrapped_env = WrappedEnv(env, True) else: wrapped_env = WrappedEnv(env) runner = Runner(agent=agent, environment=wrapped_env) rewards = [] episodes = [] def episode_finished(r): nonlocal episodes nonlocal rewards print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep, reward=r.episode_rewards[-1])) if r.episode % 1000 == 0: agent.save_model(('./{}').format(EXPERIMENT_NAME), False) try: prev_data = pickle.load(open(EXPERIMENT_NAME, "rb")) prev_len = len(prev_data[0]) prev_data[0].extend(rewards) rewards = [] prev_data[1].extend(episodes) episodes = [] pickle.dump(prev_data, open(EXPERIMENT_NAME, "wb")) except (OSError, IOError) as e: pickle.dump([rewards, episodes], open(EXPERIMENT_NAME, "wb")) if r.episode_rewards[-1] >= 5: print() print() print() print("WINNER WINNER CHICKEN DINNER") episodes.append(r.episode) rewards.append(r.episode_rewards[-1]) return True # Restore, Train, and Save Model if args.test or args.resume: # If test, change settings and restore model agent.restore_model('./','PPO_K_someS_500batch_biggerreward_99dis') runner.run(episodes=EPISODES, max_episode_timesteps=2000, episode_finished=episode_finished, deterministic=False) if not args.test: agent.save_model(('./{}').format(EXPERIMENT_NAME), False) print("Stats: ", runner.episode_rewards[-5:], runner.episode_timesteps[-5:]) #Dump reward values try: prev_data = pickle.load(open(EXPERIMENT_NAME, "rb")) prev_len = len(prev_data[0]) prev_data[0].extend(rewards) prev_data[1].extend(episodes) print(episodes) pickle.dump(prev_data, open(EXPERIMENT_NAME, "wb")) except (OSError, IOError) as e: pickle.dump([rewards, episodes], open(EXPERIMENT_NAME, "wb")) try: runner.close() except AttributeError as e: pass
from ray.rllib.agents.ppo import PPOTrainer from ray.rllib.agents.ppo.ppo_tf_policy import PPOTFPolicy from ray.rllib.models import ModelCatalog import pommerman from pommerman import agents from pommerman import configs from pommerman import constants from pommerman.envs.v0 import Pomme from models.third_model import ActorCriticModel from envs import v0 ray.init(num_cpus=5, num_gpus=1) env_config = configs.phase_0_team_v0_env() env = Pomme(**env_config['env_kwargs']) act_space = env.action_space ModelCatalog.register_custom_model("torch_conv", ActorCriticModel) agent_names = ["ppo_agent_1", "ppo_agent_2"] ppo_agent = PPOTrainer(config={ "env_config": { "agent_names": agent_names, "env_id": "Mines-PommeTeam-v0", "phase": 0 }, "num_workers": 0, "num_gpus": 0, "multiagent": { "policies": { "ppo_policy": (PPOTFPolicy, obs_space, act_space, {
def setup(self): agents = [] if self.phase == 0: arr = [0, 1] random.shuffle(arr) agents_index = arr.pop() op_index = arr.pop() self.agents_index = [agents_index] self.enemies_agents_index = [op_index] self.max_steps = 200 config = ffa_v0_fast_env() config["env_kwargs"]["max_steps"] = self.max_steps agents.insert( agents_index, BaseLineAgent(config["agent"](agents_index, config["game_type"]))) agents.insert( op_index, NoDoAgent(config["agent"](op_index, config["game_type"]))) self.env = Pomme(**config["env_kwargs"]) self.env.set_agents(agents) init_state = { 'board_size': '11', 'step_count': '0', 'board': '', 'agents': '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]', 'bombs': '[]', 'flames': '[]', 'items': '[]', 'intended_actions': '[0, 0]' } board = np.full((11, 11), 0) init_state['board'] = json.dumps(board.tolist()) agents_json = json.loads(copy.copy(init_state['agents'])) random_pos = np.random.choice(board.shape[0], (2, 2), replace=False) agents_json[0]["position"] = random_pos[0].tolist() agents_json[1]["position"] = random_pos[1].tolist() init_state['agents'] = json.dumps(agents_json) self.env._init_game_state = init_state self.env.reset() if self.phase == 1: arr = [0, 1] random.shuffle(arr) agents_index = arr.pop() op_index = arr.pop() self.agents_index = [agents_index] self.enemies_agents_index = [op_index] self.max_steps = 200 config = ffa_v0_fast_env() config["env_kwargs"]["max_steps"] = self.max_steps agents.insert( agents_index, BaseLineAgent(config["agent"](agents_index, config["game_type"]))) agents.insert( op_index, NoDoAgent(config["agent"](op_index, config["game_type"]))) self.env = Pomme(**config["env_kwargs"]) self.env.set_agents(agents) init_state = { 'board_size': '11', 'step_count': '0', 'board': '', 'agents': '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]', 'bombs': '[]', 'flames': '[]', 'items': '[]', 'intended_actions': '[0, 0]' } board = np.full((11, 11), 0) board[5, :] = (np.ones(11) * 2) agents_json = json.loads(copy.copy(init_state['agents'])) agents_json[0]["position"] = [ random.randint(0, 4), random.randint(0, 10) ] agents_json[1]["position"] = [ random.randint(6, 10), random.randint(0, 10) ] init_state['agents'] = json.dumps(agents_json) init_state['board'] = json.dumps(board.tolist()) self.env._init_game_state = init_state self.env.reset() self.observation_space = spaces.Dict({ 'boards': spaces.Box(low=-1, high=25, shape=(11, 11, 18), dtype=np.float32), 'states': spaces.Box(low=-1, high=25, shape=(8, ), dtype=np.float32) }) self.action_space = self.env.action_space
teammate = teammate.value else: teammate = -1 teammate = make_np_float([teammate]) enemies = obs["enemies"] enemies = [e.value for e in enemies] if len(enemies) < 3: enemies = enemies + [-1]*(3 - len(enemies)) enemies = make_np_float(enemies) return np.concatenate((board, bomb_blast_strength, bomb_life, position, ammo, blast_strength, can_kick, teammate, enemies)) # Instantiate the environment config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) env.action_space.n # Add 3 random agents agents = {} for agent_id in range(3): agents[agent_id] = StaticAgent(config["agent"](agent_id, config["game_type"])) # Add human agent agent_id += 1 agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows") env.set_agents(list(agents.values())) env.set_init_game_state(None)
def main(args): version = 'v1' episodes = args.episodes visualize = args.visualize config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) env.seed(0) agent = PPOAgent( states=dict(type='float', shape=(11, 11, 12)), actions=dict(type='int', num_actions=env.action_space.n), network=[ # (9, 9, 12) dict(type='conv2d', size=12, window=3, stride=1), # (7, 7, 8) dict(type='conv2d', size=8, window=3, stride=1), # (5, 5, 4) dict(type='conv2d', size=4, window=3, stride=1), # (100) dict(type='flatten'), dict(type='dense', size=64, activation='relu'), dict(type='dense', size=16, activation='relu'), ], batching_capacity=1000, step_optimizer=dict(type='adam', learning_rate=1e-4)) if os.path.exists(os.path.join('models', version, 'checkpoint')): agent.restore_model(directory=os.path.join('models', version)) agents = [] for agent_id in range(3): # agents.append(RandomAgent(config["agent"](agent_id, config["game_type"]))) # agents.append(StoppingAgent(config["agent"](agent_id, config["game_type"]))) agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) agent_id += 1 agents.append( TensorforceAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) wrapped_env = WrappedEnv(env, agent, visualize) runner = Runner(agent=agent, environment=wrapped_env) try: runner.run(episodes=episodes, max_episode_timesteps=100) except Exception as e: raise e finally: agent.save_model(directory=os.path.join('models', version, 'agent')) win_count = len( list(filter(lambda reward: reward == 1, runner.episode_rewards))) print('Stats: ') print(f' runner.episode_rewards = {runner.episode_rewards}') print(f' win count = {win_count}') try: runner.close() except AttributeError as e: raise e