def setUp(self): agent_list = [ agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent(), # agents.DockerAgent("pommerman/simple-agent", port=12345), ] self.env = pommerman.make("PommeTeam-v0", agent_list, game_state_file='record/json/000.json') self.env.reset() self.env.render()
def __init__(self, config): self.agent_list = [ agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent(), ] self.env = pommerman.make(config["env_id"], self.agent_list, config["game_state_file"]) self.is_render = config["render"] self.action_space = self.env.action_space self.prev_obs = None self.stat = None self.reset_stat() self.observation_space = spaces.Box(low=0, high=20, shape=(17, 11, 11))
def reset(self): self.prev_obs = self.env.reset() obs = {} self.reset_stat() for i in range(4): if self.is_agent_alive(i): obs[i] = featurize(self.prev_obs[i]) return obs if __name__ == '__main__': agent_list = [ agents.RandomAgent(), agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent() ] env = pommerman.make( 'PommeTeam-v0', agent_list, # '/home/lucius/working/projects/pomme_rllib/resources/one_line_state.json' ) obs = env.reset() while True: features = featurize(obs[0]) for i in range(17): print(features[i]) print()
"policies_to_train": ["policy_0"], }, "observation_filter": "MeanStdFilter", "use_pytorch": True }, env=v0.RllibPomme) # fdb733b6 checkpoint = 600 checkpoint_dir = "/home/lucius/ray_results/two_policies_vs_static_agents/PPO_RllibPomme_0_2020-06-09_23-39-347whmqdrs" ppo_agent.restore("{}/checkpoint_{}/checkpoint-{}".format( checkpoint_dir, checkpoint, checkpoint)) agent_list = [] for agent_id in range(4): agent_list.append(agents.StaticAgent()) env = pommerman.make("PommeTeam-v0", agent_list=agent_list) for i in range(1): obs = env.reset() done = False while not done: env.render() actions = env.act(obs) actions[0] = ppo_agent.compute_action(observation=featurize(obs[0]), policy_id="policy_0") actions[2] = ppo_agent.compute_action(observation=featurize(obs[2]), policy_id="policy_0") obs, reward, done, info = env.step(actions) print("reward:", reward)
training_agent_filename = "TEAM_ALT_AGENT" ##File name training_agent = agents.bilal_ccritic_bignnAgent() #Teammate1 training_agent_2 = agents.bilal_ccritic_bignnAgent() #Teammate2 num_games_per_opponents = 100 #How many games per set of opponents shuffle = True opponents_filename = "Sta_RndNB_RulRnd_RulRndNB_Simple_SimpleNB" opponent_lists = 6 agentsOpp = [[0] for i in range(opponent_lists)] agentsOpp[ 0] = [ ## Lists of agents to test, for now, assuming our agents are positions 0 and 2 training_agent, agents.StaticAgent(), training_agent_2, agents.StaticAgent() ] agentsOpp[1] = [ training_agent, agents.RandomAgentNoBombs(), training_agent_2, agents.RandomAgentNoBombs() ] agentsOpp[2] = [ training_agent, agents.RulesRandomAgent(), training_agent_2, agents.RulesRandomAgent() ] agentsOpp[3] = [ training_agent, agents.RulesRandomAgentNoBomb(), training_agent_2,