示例#1
0
 def setUp(self):
     agent_list = [
         agents.StaticAgent(),
         agents.StaticAgent(),
         agents.StaticAgent(),
         agents.StaticAgent(),
         # agents.DockerAgent("pommerman/simple-agent", port=12345),
     ]
     self.env = pommerman.make("PommeTeam-v0", agent_list, game_state_file='record/json/000.json')
     self.env.reset()
     self.env.render()
示例#2
0
 def __init__(self, config):
     self.agent_list = [
         agents.StaticAgent(),
         agents.StaticAgent(),
         agents.StaticAgent(),
         agents.StaticAgent(),
     ]
     self.env = pommerman.make(config["env_id"], self.agent_list,
                               config["game_state_file"])
     self.is_render = config["render"]
     self.action_space = self.env.action_space
     self.prev_obs = None
     self.stat = None
     self.reset_stat()
     self.observation_space = spaces.Box(low=0, high=20, shape=(17, 11, 11))
示例#3
0
    def reset(self):
        self.prev_obs = self.env.reset()
        obs = {}
        self.reset_stat()
        for i in range(4):
            if self.is_agent_alive(i):
                obs[i] = featurize(self.prev_obs[i])

        return obs


if __name__ == '__main__':
    agent_list = [
        agents.RandomAgent(),
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent()
    ]
    env = pommerman.make(
        'PommeTeam-v0',
        agent_list,
        # '/home/lucius/working/projects/pomme_rllib/resources/one_line_state.json'
    )
    obs = env.reset()

    while True:
        features = featurize(obs[0])
        for i in range(17):
            print(features[i])
        print()
示例#4
0
        "policies_to_train": ["policy_0"],
    },
    "observation_filter": "MeanStdFilter",
    "use_pytorch": True
},
                       env=v0.RllibPomme)

# fdb733b6
checkpoint = 600
checkpoint_dir = "/home/lucius/ray_results/two_policies_vs_static_agents/PPO_RllibPomme_0_2020-06-09_23-39-347whmqdrs"
ppo_agent.restore("{}/checkpoint_{}/checkpoint-{}".format(
    checkpoint_dir, checkpoint, checkpoint))

agent_list = []
for agent_id in range(4):
    agent_list.append(agents.StaticAgent())
env = pommerman.make("PommeTeam-v0", agent_list=agent_list)

for i in range(1):
    obs = env.reset()

    done = False
    while not done:
        env.render()
        actions = env.act(obs)
        actions[0] = ppo_agent.compute_action(observation=featurize(obs[0]),
                                              policy_id="policy_0")
        actions[2] = ppo_agent.compute_action(observation=featurize(obs[2]),
                                              policy_id="policy_0")
        obs, reward, done, info = env.step(actions)
        print("reward:", reward)
training_agent_filename = "TEAM_ALT_AGENT"  ##File name

training_agent = agents.bilal_ccritic_bignnAgent()  #Teammate1
training_agent_2 = agents.bilal_ccritic_bignnAgent()  #Teammate2

num_games_per_opponents = 100  #How many games per set of opponents

shuffle = True
opponents_filename = "Sta_RndNB_RulRnd_RulRndNB_Simple_SimpleNB"
opponent_lists = 6
agentsOpp = [[0] for i in range(opponent_lists)]
agentsOpp[
    0] = [  ## Lists of agents to test, for now, assuming our agents are positions 0 and 2
        training_agent,
        agents.StaticAgent(), training_agent_2,
        agents.StaticAgent()
    ]
agentsOpp[1] = [
    training_agent,
    agents.RandomAgentNoBombs(), training_agent_2,
    agents.RandomAgentNoBombs()
]
agentsOpp[2] = [
    training_agent,
    agents.RulesRandomAgent(), training_agent_2,
    agents.RulesRandomAgent()
]
agentsOpp[3] = [
    training_agent,
    agents.RulesRandomAgentNoBomb(), training_agent_2,