示例#1
0
 def __init__(self,
              env_id,
              random_side=True,
              agent_list=None,
              rule_agents=[],
              replay_dir=None,
              n_player=4):
     self.n_player = n_player
     self.base_agents = [agents.RandomAgent() for _ in range(n_player)]
     if agent_list is None:
         self.agent_list = self.base_agents
     else:
         assert isinstance(agent_list, str)
         agent_list = agent_list.split(',')
         assert len(agent_list) == n_player
         self.agent_list = [
             helpers.make_agent_from_string(agent, i)
             for i, agent in enumerate(agent_list)
         ]
     # Make the environment using the agent list
     env = pommerman.make(env_id, self.agent_list)
     if agent_list is not None:
         for id_, agent in enumerate(self.base_agents):
             agent.init_agent(id_, env.spec._kwargs['game_type'])
     super(PommeBase, self).__init__(env)
     self.rule_agents = rule_agents
     self._random_side = random_side
     self.random_side()
     self._uuid = str(uuid.uuid1())[:8]
     self._replay_dir = replay_dir
     self._replay_data = {"mode": str(env_id)}
def main():
    '''CLI interface to bootstrap taining'''
    parser = argparse.ArgumentParser(description="Playground Flags.")
    parser.add_argument("--game", default="pommerman", help="Game to choose.")
    parser.add_argument(
        "--config",
        default="PommeFFACompetition-v0",
        help="Configuration to execute. See env_ids in "
             "configs.py for options.")
    parser.add_argument(
        "--agents",
        default="tensorflow::agents.TensorFlowAgent,test::agents.SimpleAgent,"
                "test::agents.SimpleAgent,test::agents.SimpleAgent,",
        help="Comma delineated list of agent types and docker "
             "locations to run the agents.")
    parser.add_argument(
        "--record_pngs_dir",
        default=None,
        help="Directory to record the PNGs of the game. "
             "Doesn't record if None.")
    parser.add_argument(
        "--record_json_dir",
        default=None,
        help="Directory to record the JSON representations of "
             "the game. Doesn't record if None.")
    parser.add_argument(
        "--render",
        default=False,
        action='store_true',
        help="Whether to render or not. Defaults to False.")
    parser.add_argument(
        "--game_state_file",
        default=None,
        help="File from which to load game state. Defaults to "
             "None.")
    args = parser.parse_args()

    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    # agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file

    agents = [
        helpers.make_agent_from_string(agent_string, agent_id + 1000)
        for agent_id, agent_string in enumerate(args.agents.split(","))
    ]

    print(type(agents[0]))
示例#3
0
def main():
    '''CLI interface to bootstrap taining'''
    parser = argparse.ArgumentParser(description="Playground Flags.")
    parser.add_argument("--game", default="pommerman", help="Game to choose.")
    parser.add_argument("--config",
                        default="PommeFFACompetition-v0",
                        help="Configuration to execute. See env_ids in "
                        "configs.py for options.")
    parser.add_argument("--agents",
                        default="tensorforce::ppo,test::agents.SimpleAgent,"
                        "test::agents.SimpleAgent,test::agents.SimpleAgent",
                        help="Comma delineated list of agent types and docker "
                        "locations to run the agents.")
    parser.add_argument("--agent_env_vars",
                        help="Comma delineated list of agent environment vars "
                        "to pass to Docker. This is only for the Docker Agent."
                        " An example is '0:foo=bar:baz=lar,3:foo=lam', which "
                        "would send two arguments to Docker Agent 0 and one to"
                        " Docker Agent 3.",
                        default="")
    parser.add_argument("--record_pngs_dir",
                        default=None,
                        help="Directory to record the PNGs of the game. "
                        "Doesn't record if None.")
    parser.add_argument("--record_json_dir",
                        default=None,
                        help="Directory to record the JSON representations of "
                        "the game. Doesn't record if None.")
    parser.add_argument("--render",
                        default=False,
                        action='store_true',
                        help="Whether to render or not. Defaults to False.")
    parser.add_argument("--game_state_file",
                        default=None,
                        help="File from which to load game state. Defaults to "
                        "None.")
    parser.add_argument("--checkpoint",
                        default="models/ppo",
                        help="Directory where checkpoint file stored to.")
    parser.add_argument("--num_of_episodes",
                        default="10",
                        help="Number of episodes")
    parser.add_argument("--max_timesteps",
                        default="2000",
                        help="Number of steps")
    args = parser.parse_args()

    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file
    checkpoint = args.checkpoint
    num_of_episodes = int(args.num_of_episodes)
    max_timesteps = int(args.max_timesteps)

    # TODO: After https://github.com/MultiAgentLearning/playground/pull/40
    #       this is still missing the docker_env_dict parsing for the agents.
    agents = [
        helpers.make_agent_from_string(agent_string, agent_id + 1000)
        for agent_id, agent_string in enumerate(args.agents.split(","))
    ]

    env = make(config, agents, game_state_file)
    training_agent = None

    for agent in agents:
        if type(agent) == TensorForceAgent:
            training_agent = agent
            env.set_training_agent(agent.agent_id)
            break

    if args.record_pngs_dir:
        assert not os.path.isdir(args.record_pngs_dir)
        os.makedirs(args.record_pngs_dir)
    if args.record_json_dir:
        assert not os.path.isdir(args.record_json_dir)
        os.makedirs(args.record_json_dir)

    # Create a Proximal Policy Optimization agent
    agent = training_agent.initialize(env)

    atexit.register(functools.partial(clean_up_agents, agents))
    wrapped_env = WrappedEnv(env, visualize=args.render)
    runner = Runner(agent=agent, environment=wrapped_env)
    runner.run(episodes=num_of_episodes, max_episode_timesteps=max_timesteps)
    print("Stats: ", runner.episode_rewards[-30:], runner.episode_timesteps,
          runner.episode_times)

    agent.save_model(checkpoint)

    rewards = runner.episode_rewards
    win = rewards.count(1)
    lose = rewards.count(-1)
    draw = rewards.count(0)
    total = win + lose + draw
    ratio = round((win / total) * 100.0, 2)
    print("Results ({}%) = Win({}), Lose({}), Draw({})".format(
        ratio, win, lose, draw))
    try:
        runner.close()
    except AttributeError as e:
        pass
示例#4
0
def run(args, num_times=1, seed=None):
    '''Wrapper to help start the game'''
    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file
    render_mode = args.render_mode
    do_sleep = args.do_sleep

    agents = [
        helpers.make_agent_from_string(agent_string, agent_id)
        for agent_id, agent_string in enumerate(args.agents.split(','))
    ]

    env = make(config, agents, game_state_file, render_mode=render_mode)

    def _run(record_pngs_dir=None, record_json_dir=None):
        '''Runs a game'''
        print("Starting the Game.")
        if record_pngs_dir and not os.path.isdir(record_pngs_dir):
            os.makedirs(record_pngs_dir)
        if record_json_dir and not os.path.isdir(record_json_dir):
            os.makedirs(record_json_dir)

        obs = env.reset()
        steps = 0
        done = False

        while not done:
            steps += 1
            if args.render:
                env.render(record_pngs_dir=record_pngs_dir,
                           record_json_dir=record_json_dir,
                           do_sleep=do_sleep)
            actions = env.act(obs)
            obs, reward, done, info = env.step(actions)

        print("Final Result: ", info)
        if args.render:
            env.render(record_pngs_dir=record_pngs_dir,
                       record_json_dir=record_json_dir,
                       do_sleep=do_sleep)
            if do_sleep:
                time.sleep(5)
            env.render(close=True)

        if record_json_dir:
            finished_at = datetime.now().isoformat()
            _agents = args.agents.split(',')
            utility.join_json_state(record_json_dir, _agents, finished_at,
                                    config)

        return info

    if seed is None:
        seed = random.randint(0, 1e6)
    np.random.seed(seed)
    random.seed(seed)
    env.seed(seed)

    infos = []
    times = []
    for i in range(num_times):
        start = time.time()

        record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \
                           if record_pngs_dir else None
        record_json_dir_ = record_json_dir + '/%d' % (i+1) \
                           if record_json_dir else None
        infos.append(_run(record_pngs_dir_, record_json_dir_))

        times.append(time.time() - start)
        print("Game Time: ", times[-1])

    atexit.register(env.close)
    return infos
示例#5
0
def run(args, num_times=1, seed=None):
    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file

    # TODO: After https://github.com/MultiAgentLearning/playground/pull/40
    #       this is still missing the docker_env_dict parsing for the agents.
    agents = [
        helpers.make_agent_from_string(agent_string, agent_id+1000)
        for agent_id, agent_string in enumerate(args.agents.split(','))
    ]

    result_name = args.agents.split(',')[0].split('.')[1] + ".txt"

    env = make(config, agents, game_state_file)

    if args.record_pngs_dir:
        assert not os.path.isdir(args.record_pngs_dir)
        os.makedirs(args.record_pngs_dir)
    if args.record_json_dir:
        assert not os.path.isdir(args.record_json_dir)
        os.makedirs(args.record_json_dir)

    def _run(seed, record_pngs_dir=None, record_json_dir=None):
        env.seed(seed)
        print("Starting the Game.")
        obs = env.reset()
        steps = 0
        done = False
        while not done:
            steps += 1
            if args.render == "True":
                env.render(record_pngs_dir=args.record_pngs_dir, record_json_dir=args.record_json_dir)
            actions = env.act(obs)
            obs, reward, done, info = env.step(actions)

        print("Final Result: ", info)
        sys.stdout = open(result_name, "a")
        print(info)
        sys.stdout = sys.__stdout__
        if args.render:
            #time.sleep(5)
            env.render(record_pngs_dir=args.record_pngs_dir, record_json_dir=args.record_json_dir, close=True)
        return info

    infos = []
    times = []
    for i in range(num_times):
        start = time.time()
        seed = i
        np.random.seed(seed)
        random.seed(seed)

        record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) if record_pngs_dir else None
        record_json_dir_ = record_json_dir + '/%d' % (i+1) if record_json_dir else None
        infos.append(_run(seed, record_pngs_dir_, record_json_dir_))

        times.append(time.time() - start)
        print("Game Time: ", times[-1])

    atexit.register(env.close)
    return infos
示例#6
0
def run(config,
        agents_list,
        record_pngs_dir = None,
        record_json_dir = None,
        agent_env_vars = "",
        game_state_file = None,
        render_mode = 'human',
        do_sleep = True,
        render = False,
        num_episodes=1,
        seed=None):
    '''Wrapper to help start the game'''

    agents = [
        helpers.make_agent_from_string(agent_string, agent_id)
        for agent_id, agent_string in enumerate(agents_list.split(','))
    ]

    env = make(config, agents, game_state_file, render_mode=render_mode)

    def _run(record_pngs_dir=None, record_json_dir=None):
        '''Runs a game'''
        print("Starting the Game.")
        if record_pngs_dir and not os.path.isdir(record_pngs_dir):
            os.makedirs(record_pngs_dir)
        if record_json_dir and not os.path.isdir(record_json_dir):
            os.makedirs(record_json_dir)

        obs = env.reset()
        done = False

        while not done:
            if render:
                env.render(
                    record_pngs_dir=record_pngs_dir,
                    record_json_dir=record_json_dir,
                    do_sleep=do_sleep)
            if render is False and record_json_dir:
                env.save_json(record_json_dir)
                time.sleep(1.0 / env._render_fps)
            actions = env.act(obs)
            obs, reward, done, info = env.step(actions)

        print("Final Result: ", info)
        if render:
            env.render(
                record_pngs_dir=record_pngs_dir,
                record_json_dir=record_json_dir,
                do_sleep=do_sleep)
            if do_sleep:
                time.sleep(5)
            env.render(close=True)

        if render is False and record_json_dir:
            env.save_json(record_json_dir)
            time.sleep(1.0 / env._render_fps)

        if record_json_dir:
            finished_at = datetime.now().isoformat()
            _agents = agents_list.split(',')
            utility.join_json_state(record_json_dir, _agents, finished_at,
                                    config, info)

        return info

    if seed is None:
        # Pick a random seed between 0 and 2^31 - 1
        seed = random.randint(0, np.iinfo(np.int32).max)
    np.random.seed(seed)
    random.seed(seed)
    env.seed(seed)

    infos = []
    times = []
    for i in range(num_episodes):
        start = time.time()

        record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \
                           if record_pngs_dir else None
        record_json_dir_ = record_json_dir + '/%d' % (i+1) \
                           if record_json_dir else None
        infos.append(_run(record_pngs_dir_, record_json_dir_))

        times.append(time.time() - start)
        print("Game Time: ", times[-1])

    atexit.register(env.close)
    return infos
示例#7
0
def main():
    '''CLI interface to bootstrap taining'''
    parser = argparse.ArgumentParser(description="Playground Flags.")
    parser.add_argument("--game", default="pommerman", help="Game to choose.")
    parser.add_argument("--config",
                        default='PommeFFACompetition-v0',
                        help="Configuration to execute. See env_ids in "
                        "configs.py for options. default is 1v1")
    parser.add_argument(
        "--agents",
        default="tensorforce::ppo,test::agents.SimpleAgent,"
        "test::agents.SimpleAgent,test::agents.SimpleAgent",
        #default="tensorforce::ppo,test::agents.RandomAgent,"
        #"test::agents.RandomAgent,test::agents.RandomAgent",
        help="Comma delineated list of agent types and docker "
        "locations to run the agents.")
    #agent in position 1

    parser.add_argument("--agent_env_vars",
                        help="Comma delineated list of agent environment vars "
                        "to pass to Docker. This is only for the Docker Agent."
                        " An example is '0:foo=bar:baz=lar,3:foo=lam', which "
                        "would send two arguments to Docker Agent 0 and one to"
                        " Docker Agent 3.",
                        default="")
    parser.add_argument("--record_pngs_dir",
                        default=None,
                        help="Directory to record the PNGs of the game. "
                        "Doesn't record if None.")
    parser.add_argument("--record_json_dir",
                        default=None,
                        help="Directory to record the JSON representations of "
                        "the game. Doesn't record if None.")
    parser.add_argument("--render",
                        default=False,
                        action='store_true',
                        help="Whether to render or not. Defaults to False.")
    parser.add_argument("--game_state_file",
                        default=None,
                        help="File from which to load game state. Defaults to "
                        "None.")
    parser.add_argument(
        '--batch-size',  # This doesn't change batch-size in tensorforce_agent.py
        default=10,
        type=int,
        help='average reward visualization by batch size. default=100 episodes'
    )
    parser.add_argument(
        '--episodes',
        default=10,
        type=int,
        help=
        'number of training episodes, default=1000. must be divisible by batch_size'
    )
    parser.add_argument(
        '--modelname',
        default='default',
        help=
        'name of model file savename, timesteps wil be appended. default= default'
    )
    parser.add_argument('--loadfile',
                        default=None,
                        help='name of model you want to load')
    parser.add_argument('--numprocs',
                        default=12,
                        type=int,
                        help='num parallel processes. default=12')
    args = parser.parse_args()

    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file
    num_procs = args.numprocs

    #variables
    save_path = 'saved_models/'
    model_name = args.modelname
    batch_size = args.batch_size
    num_episodes = args.episodes
    assert (num_episodes % batch_size == 0)

    agents = [
        helpers.make_agent_from_string(agent_string, agent_id)
        for agent_id, agent_string in enumerate(args.agents.split(","))
    ]
    env = make(config, agents, game_state_file)
    training_agent = None

    for agent in agents:
        if type(agent) == TensorForceAgent:
            training_agent = agent
            env.set_training_agent(agent.agent_id)
            break

    if args.record_pngs_dir:
        assert not os.path.isdir(args.record_pngs_dir)
        os.makedirs(args.record_pngs_dir)
    if args.record_json_dir:
        assert not os.path.isdir(args.record_json_dir)
        os.makedirs(args.record_json_dir)

    agent = training_agent.initialize(
        env,
        num_procs,
        # summarizer={'directory': 'tensorforce_agent', 'labels': 'graph, losses'},
        #saver={'directory': './'+save_path, 'filename': model_name,'append_timesteps': True}
    )

    # USHA Model should load automatically as saver is provided.
    if args.loadfile:
        agent.restore(directory=save_path, filename=args.loadfile)

    atexit.register(functools.partial(clean_up_agents, agents))

    wrapped_envs = []
    for i in range(num_procs):
        wrapped_envs.append(WrappedEnv(env, visualize=args.render))

    # wrapped_env=WrappedEnv(env,visualize=args.render)

    runner_time = timeit.default_timer()

    #load history.pickle

    if args.loadfile:
        try:
            handle = open(save_path + args.modelname + '-history.pkl', 'rb')
            history = pickle.load(handle)
        except:
            history = None
    else:
        history = None

    runner = ParallelRunner(agent=agent, environments=wrapped_envs)
    # runner = Runner(agent=agent, environment=wrapped_env)

    num_episodes += runner.global_episodes  #runner trains off number of global episodes
    '''
    if you trained 100 episodes, num_episodes needs to be 200 if you want to train another 100
    '''

    runner.run(num_episodes=num_episodes, max_episode_timesteps=2000)

    print(runner.episode_rewards)

    if history:
        history['episode_rewards'].extend(runner.episode_rewards)
        history['episode_timesteps'].extend(runner.episode_timesteps)
        history['episode_seconds'].extend(runner.episode_seconds)
        history['episode_agent_seconds'].extend(runner.episode_agent_seconds)
    else:
        history = {}
        history['episode_rewards'] = runner.episode_rewards
        history['episode_timesteps'] = runner.episode_timesteps
        history['episode_seconds'] = runner.episode_seconds
        history['episode_agent_seconds'] = runner.episode_agent_seconds

    with open(save_path + model_name + '-history.pkl', 'wb') as handle:
        pickle.dump(history, handle)
    # USHA Model should save automatically as saver is provided.
    agent.save(directory=save_path,
               filename=model_name + str(runner.global_episodes),
               append_timestep=False)
    print('Runner time: ', timeit.default_timer() - runner_time)

    plt.plot(
        np.arange(0, int(len(history['episode_rewards']) / batch_size)),
        np.mean(np.asarray(history['episode_rewards']).reshape(-1, batch_size),
                axis=1))
    plt.title('average rewards per batch of episodes')
    plt.ylabel('average reward')
    plt.xlabel('batch of ' + str(batch_size) + ' episodes')
    plt.show()

    try:
        runner.close()
    except AttributeError as e:
        pass
示例#8
0
def main():
    '''CLI interface to bootstrap testing'''
    parser = argparse.ArgumentParser(description="Playground Flags.")
    parser.add_argument("--game", default="pommerman", help="Game to choose.")
    parser.add_argument(
        "--config",
        default="PommeFFACompetition-v0",
        help="Configuration to execute. See env_ids in "
        "configs.py for options.")
    parser.add_argument(
        "--agents",
        default="tensorforce::ppo2,test::agents.SimpleAgent,"
        "test::agents.SimpleAgent,test::agents.SimpleAgent",
        help="Comma delineated list of agent types and docker "
        "locations to run the agents.")
    parser.add_argument(
        "--agent_env_vars",
        help="Comma delineated list of agent environment vars "
        "to pass to Docker. This is only for the Docker Agent."
        " An example is '0:foo=bar:baz=lar,3:foo=lam', which "
        "would send two arguments to Docker Agent 0 and one to"
        " Docker Agent 3.",
        default="")
    parser.add_argument(
        "--record_pngs_dir",
        default=None,
        help="Directory to record the PNGs of the game. "
        "Doesn't record if None.")
    parser.add_argument(
        "--record_json_dir",
        default=None,
        help="Directory to record the JSON representations of "
        "the game. Doesn't record if None.")
    parser.add_argument(
        "--render",
        default=True,
        action='store_true',
        help="Whether to render or not. Defaults to False.")
    parser.add_argument(
        "--game_state_file",
        default=None,
        help="File from which to load game state. Defaults to "
        "None.")
    args = parser.parse_args()
    print(args)
    config = args.config
    game_state_file = args.game_state_file

    agents_string = args.agents.split(",")

    agents = [
        helpers.make_agent_from_string(agent_string, agent_id + 1000)
        for agent_id, agent_string in enumerate(agents_string)
    ]

    env = make(config, agents, game_state_file)

    for agent in agents:
        if type(agent) == agnts.TensorForceAgent or type(agent) == agnts.BaselineAgent:
            env.set_training_agent(agent.agent_id)
            break

    if args.record_pngs_dir:
        assert not os.path.isdir(args.record_pngs_dir)
        os.makedirs(args.record_pngs_dir)
    if args.record_json_dir:
        assert not os.path.isdir(args.record_json_dir)
        os.makedirs(args.record_json_dir)

    wrapped_env = WrappedEnvBaselines(env, visualize=args.render)
    atexit.register(functools.partial(clean_up_agents, agents))

    from stable_baselines import PPO2, DQN, A2C
    print(
        "[INFO] Loading PPO model"
    )
    #models/ppo2/1558920269.0932562
    agent = PPO2.load("pommerman/cli/models/1559359177.602215")

    test_finished_episodes = 0
    test_won_episodes = 0
    test_total_timesteps = 0
    actions_freq = [0, 0, 0, 0, 0, 0]

    print(
        "[INFO] Starting testing for {test_length} games".format(test_length=testing_episodes)
    )

    observed_state = wrapped_env.reset()
    while test_finished_episodes < testing_episodes:

        test_total_timesteps += 1
        action, _states = agent.predict(observed_state)
        actions_freq[action] += 1
        observed_state, reward, episode_finished, info = wrapped_env.step(action)
        if test_total_timesteps == 1:
            time.sleep(1)
        if testing_episodes - test_finished_episodes <= 5:
            wrapped_env.visualize = True

        if episode_finished:
            test_finished_episodes += 1
            print("[LOG] Last episode reward: " + str(reward))
            if reward == 1:
                test_won_episodes += 1
            observed_state = wrapped_env.reset()
            time.sleep(1)

    print(
        "[INFO] Won episodes/total episodes: {rews}/{test_ep}".format(test_ep=testing_episodes, rews=test_won_episodes)
    )
    print(
        "[INFO] Average episode length: {times}".format(test_ep=testing_episodes, times=float(test_total_timesteps)/testing_episodes)
    )
    exit()
def main():
    '''CLI interface to bootstrap taining'''
    parser = argparse.ArgumentParser(description="Playground Flags.")
    parser.add_argument("--game", default="pommerman", help="Game to choose.")
    parser.add_argument("--config",
                        default="PommeFFACompetition-v0",
                        help="Configuration to execute. See env_ids in "
                        "configs.py for options.")
    parser.add_argument("--agents",
                        default="tensorforce::ppo,test::agents.SimpleAgent,"
                        "test::agents.SimpleAgent,test::agents.SimpleAgent",
                        help="Comma delineated list of agent types and docker "
                        "locations to run the agents.")
    parser.add_argument("--agent_env_vars",
                        help="Comma delineated list of agent environment vars "
                        "to pass to Docker. This is only for the Docker Agent."
                        " An example is '0:foo=bar:baz=lar,3:foo=lam', which "
                        "would send two arguments to Docker Agent 0 and one to"
                        " Docker Agent 3.",
                        default="")
    parser.add_argument("--record_pngs_dir",
                        default=None,
                        help="Directory to record the PNGs of the game. "
                        "Doesn't record if None.")
    parser.add_argument("--record_json_dir",
                        default=None,
                        help="Directory to record the JSON representations of "
                        "the game. Doesn't record if None.")
    parser.add_argument("--render",
                        default=False,
                        action='store_true',
                        help="Whether to render or not. Defaults to False.")
    parser.add_argument("--game_state_file",
                        default=None,
                        help="File from which to load game state. Defaults to "
                        "None.")
    parser.add_argument("--checkpoint",
                        default="models/ppo",
                        help="Directory where checkpoint file stored to.")
    parser.add_argument("--num_of_episodes",
                        default="10",
                        help="Number of episodes")
    parser.add_argument("--max_timesteps",
                        default="2000",
                        help="Number of steps")
    parser.add_argument("--rewards",
                        default=DEFAULT_REWARDS,
                        help="Shaping of rewards")
    args = parser.parse_args()

    config = args.config
    # record_pngs_dir = args.record_pngs_dir
    # record_json_dir = args.record_json_dir
    # agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file
    checkpoint = args.checkpoint
    num_of_episodes = int(args.num_of_episodes)
    max_timesteps = int(args.max_timesteps)
    custom_rewards = args.rewards

    # TODO: After https://github.com/MultiAgentLearning/playground/pull/40
    #       this is still missing the docker_env_dict parsing for the agents.
    agents = [
        create_ppo_agent(
            helpers.make_agent_from_string(agent_string, agent_id + 1000))
        for agent_id, agent_string in enumerate(args.agents.split(","))
    ]

    env = make(config, agents, game_state_file)
    training_agent = None
    training_agent_id = None

    for agent in agents:
        if type(agent) == TensorForcePpoAgent:
            print("Ppo agent initiazlied : {}, {}".format(agent, type(agent)))
            training_agent = agent
            env.set_training_agent(agent.agent_id)
            training_agent_id = agent.agent_id
            break
        print("[{}] : id[{}]".format(agent, agent.agent_id))

    if args.record_pngs_dir:
        assert not os.path.isdir(args.record_pngs_dir)
        os.makedirs(args.record_pngs_dir)
    if args.record_json_dir:
        assert not os.path.isdir(args.record_json_dir)
        os.makedirs(args.record_json_dir)

    learning_agent = training_agent.initialize(env)
    for agent in agents:
        if type(agent) == TensorForcePpoAgent:
            if agent.agent_id == training_agent_id:
                learning_agent = training_agent.initialize(env)
            else:
                agent.initialize(env)

    atexit.register(functools.partial(clean_up_agents, agents))
    wrapped_env = WrappedEnv(env, visualize=args.render)
    wrapped_env.set_render(args.render)
    wrapped_env.set_rewards(custom_rewards)

    runner = Runner(agent=learning_agent, environment=wrapped_env)
    runner.run(episodes=num_of_episodes, max_episode_timesteps=max_timesteps)
    print("Stats: ", runner.episode_rewards[-30:], runner.episode_timesteps,
          runner.episode_times)

    learning_agent.save_model(checkpoint)

    rewards = runner.episode_rewards
    import numpy as np
    mean = np.mean(rewards)
    print('last 30 rewards {}'.format(rewards[-30:]))
    print('mean of rewards {}'.format(mean))

    try:
        runner.close()
    except AttributeError as e:
        print(e)
        pass
示例#10
0
def run(args, num_times=1, seed=None):
    '''Wrapper to help start the game'''
    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file
    render_mode = args.render_mode
    do_sleep = args.do_sleep

    agents = [
        helpers.make_agent_from_string(agent_string, agent_id)
        for agent_id, agent_string in enumerate(args.agents.split(','))
    ]

    #TODO: DELETE
    #pdb.set_trace()
    ###############

    env = make(config, agents, game_state_file, render_mode=render_mode)

    for i, agent_string in enumerate(args.agents.split(',')):
        if agent_string.split('::')[0] == "tensorforce":
            print("run_buttle[51] ", type(agents[i]))
            a = agents[i]
            a_env = a.initialize(env)
            a_env.restore_model(directory="./pommerman/cli/saved_win/")
            # agents[i].initialize(env).restore_model(directory="./saved_win/")

    def _run(record_pngs_dir=None, record_json_dir=None):
        '''Runs a game'''
        print("Starting the Game.")
        if record_pngs_dir and not os.path.isdir(record_pngs_dir):
            os.makedirs(record_pngs_dir)
        if record_json_dir and not os.path.isdir(record_json_dir):
            os.makedirs(record_json_dir)

        obs = env.reset()
        done = False

        while not done:
            if args.render:
                env.render(record_pngs_dir=record_pngs_dir,
                           record_json_dir=record_json_dir,
                           do_sleep=do_sleep)
            actions = env.act(obs)
            obs, reward, done, info = env.step(actions)

        print("Final Result: ", info)
        if args.render:
            env.render(record_pngs_dir=record_pngs_dir,
                       record_json_dir=record_json_dir,
                       do_sleep=do_sleep)
            if do_sleep:
                time.sleep(5)
            env.render(close=True)

        if record_json_dir:
            finished_at = datetime.now().isoformat()
            _agents = args.agents.split(',')
            utility.join_json_state(record_json_dir, _agents, finished_at,
                                    config, info)

        return info

    if seed is None:
        # Pick a random seed between 0 and 2^31 - 1
        seed = random.randint(0, np.iinfo(np.int32).max)
    np.random.seed(seed)
    random.seed(seed)
    env.seed(seed)

    infos = []
    times = []
    for i in range(num_times):
        start = time.time()

        record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \
                           if record_pngs_dir else None
        record_json_dir_ = record_json_dir + '/%d' % (i+1) \
                           if record_json_dir else None
        infos.append(_run(record_pngs_dir_, record_json_dir_))

        times.append(time.time() - start)
        print("Game Time: ", times[-1])

    atexit.register(env.close)
    return infos
示例#11
0
def run(args, num_times=1, seed=None):
    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file
    render_mode = args.render_mode

    # TODO: After https://github.com/MultiAgentLearning/playground/pull/40
    #       this is still missing the docker_env_dict parsing for the agents.
    agents = [
        helpers.make_agent_from_string(agent_string, agent_id+1000)
        for agent_id, agent_string in enumerate(args.agents.split(','))
    ]

    env = make(config, agents, game_state_file, render_mode=render_mode)

    if record_pngs_dir and not os.path.isdir(record_pngs_dir):
        os.makedirs(record_pngs_dir)
    if record_json_dir and not os.path.isdir(record_json_dir):
        os.makedirs(record_json_dir)

    def _run(seed, record_pngs_dir=None, record_json_dir=None):
        print("Starting the Game.")
        obs = env.reset()
        steps = 0
        done = False
        while not done:
            steps += 1
            if args.render:
                env.render(record_pngs_dir=args.record_pngs_dir,
                           record_json_dir=args.record_json_dir,
                           mode=args.render_mode)
            actions = env.act(obs)
            obs, reward, done, info = env.step(actions)

        for agent in agents:
            agent.episode_end(reward[agent.agent_id])
        
        print("Final Result: ", info)
        if args.render:
            env.render(record_pngs_dir=args.record_pngs_dir,
                       record_json_dir=args.record_json_dir, 
                       mode=args.render_mode)
            time.sleep(5)
            env.render(close=True)
        return info

    if seed is None:
        seed = random.randint(0, 1e6)
    np.random.seed(seed)
    random.seed(seed)
    env.seed(seed)

    infos = []
    times = []
    for i in range(num_times):
        start = time.time()

        record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \
                           if record_pngs_dir else None
        record_json_dir_ = record_json_dir + '/%d' % (i+1) \
                           if record_json_dir else None
        infos.append(_run(seed, record_pngs_dir_, record_json_dir_))

        times.append(time.time() - start)
        print("Game Time: ", times[-1])

    atexit.register(env.close)
    return infos
def main():
    '''CLI interface to bootstrap taining'''
    parser = argparse.ArgumentParser(description="Playground Flags.")
    parser.add_argument("--game", default="pommerman", help="Game to choose.")
    parser.add_argument("--config",
                        default="PommeFFACompetition-v0",
                        help="Configuration to execute. See env_ids in "
                        "configs.py for options.")
    parser.add_argument("--agents",
                        default="tensorforce::ppo,test::agents.SimpleAgent,"
                        "test::agents.SimpleAgent,test::agents.SimpleAgent",
                        help="Comma delineated list of agent types and docker "
                        "locations to run the agents.")
    parser.add_argument("--agent_env_vars",
                        help="Comma delineated list of agent environment vars "
                        "to pass to Docker. This is only for the Docker Agent."
                        " An example is '0:foo=bar:baz=lar,3:foo=lam', which "
                        "would send two arguments to Docker Agent 0 and one to"
                        " Docker Agent 3.",
                        default="")
    parser.add_argument("--record_pngs_dir",
                        default=None,
                        help="Directory to record the PNGs of the game. "
                        "Doesn't record if None.")
    parser.add_argument("--record_json_dir",
                        default=None,
                        help="Directory to record the JSON representations of "
                        "the game. Doesn't record if None.")
    parser.add_argument("--render",
                        default=False,
                        action='store_true',
                        help="Whether to render or not. Defaults to False.")
    parser.add_argument("--game_state_file",
                        default=None,
                        help="File from which to load game state. Defaults to "
                        "None.")
    args = parser.parse_args()

    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file

    # TODO: After https://github.com/MultiAgentLearning/playground/pull/40
    #       this is still missing the docker_env_dict parsing for the agents.
    agents = [
        helpers.make_agent_from_string(agent_string, agent_id + 1000)
        for agent_id, agent_string in enumerate(args.agents.split(","))
    ]

    env = make(config, agents, game_state_file)
    #TODO: DELETE!
    observation = env.reset()
    print(observation)
    ###########
    training_agent = None

    for agent in agents:
        if type(agent) == TensorForceAgent:
            training_agent = agent
            env.set_training_agent(agent.agent_id)
            break

    if args.record_pngs_dir:
        assert not os.path.isdir(args.record_pngs_dir)
        os.makedirs(args.record_pngs_dir)
    if args.record_json_dir:
        assert not os.path.isdir(args.record_json_dir)
        os.makedirs(args.record_json_dir)

    def episode_finished(r):

        if not (r.episode % 100):
            print(
                "Finished episode {ep} after {ts} timesteps (reward: {reward})"
                .format(ep=r.episode,
                        ts=r.episode_timestep,
                        reward=r.episode_rewards[-1]))
        if (r.episode_rewards[-1] >= 0):
            r.agent.save_model(directory="./clone_saved_win/")
        elif not (r.episode % 1000):
            r.agent.save_model(directory="./saved_played/")
        return True

    # Create a Proximal Policy Optimization agent
    agent = training_agent.initialize(env)
    atexit.register(functools.partial(clean_up_agents, agents))
    wrapped_env = WrappedEnv(env, visualize=args.render)
    runner = Runner(agent=agent, environment=wrapped_env)
    agent.restore_model(
        directory=
        "/home/rishchen/Source/Work/playground/pommerman/cli/clone_saved_win/")
    runner.run(episodes=10,
               max_episode_timesteps=20000,
               episode_finished=episode_finished)
    print("Stats: ", runner.episode_rewards, runner.episode_timesteps,
          runner.episode_times)

    try:
        runner.close()
    except AttributeError as e:
        pass
def main():
    '''CLI interface to bootstrap taining'''
    parser = argparse.ArgumentParser(description="Playground Flags.")
    parser.add_argument("--game", default="pommerman", help="Game to choose.")
    parser.add_argument("--config",
                        default="PommeFFANHWC-v0",
                        help="Configuration to execute. See env_ids in "
                        "configs.py for options.")
    parser.add_argument("--agents",
                        default="tensorforce::ppo,test::agents.SimpleAgent,"
                        "test::agents.SimpleAgent,test::agents.SimpleAgent",
                        help="Comma delineated list of agent types and docker "
                        "locations to run the agents.")
    parser.add_argument("--agent_env_vars",
                        help="Comma delineated list of agent environment vars "
                        "to pass to Docker. This is only for the Docker Agent."
                        " An example is '0:foo=bar:baz=lar,3:foo=lam', which "
                        "would send two arguments to Docker Agent 0 and one to"
                        " Docker Agent 3.",
                        default="")
    parser.add_argument("--record_pngs_dir",
                        default=None,
                        help="Directory to record the PNGs of the game. "
                        "Doesn't record if None.")
    parser.add_argument("--record_json_dir",
                        default=None,
                        help="Directory to record the JSON representations of "
                        "the game. Doesn't record if None.")
    parser.add_argument("--render",
                        default=False,
                        action='store_true',
                        help="Whether to render or not. Defaults to False.")
    parser.add_argument("--game_state_file",
                        default=None,
                        help="File from which to load game state. Defaults to "
                        "None.")
    parser.add_argument(
        "--params_dir",
        default="ppo",
        help=
        "Directory in which to save the params and from which load to. Defaults to None"
        "None.")
    parser.add_argument(
        "--testing",
        default=False,
        action='store_true',
        help=
        "Test mode for the trained/training agent or not. Defaults to False (i.e. training mode)."
    )
    parser.add_argument(
        "--lstm",
        default=False,
        action='store_true',
        help="Whether to add an (internal) LSTM layer to the model.")
    args = parser.parse_args()

    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file
    params_dir = "../params/{}".format(args.params_dir)

    # TODO: After https://github.com/MultiAgentLearning/playground/pull/40
    #       this is still missing the docker_env_dict parsing for the agents.
    agents = [
        helpers.make_agent_from_string(agent_string, agent_id + 1000)
        for agent_id, agent_string in enumerate(args.agents.split(","))
    ]

    env = make(config, agents, game_state_file)
    training_agent = None

    for agent in agents:
        if type(agent) == TensorForceAgent:
            training_agent = agent
            env.set_training_agent(agent.agent_id)
            break

    if record_pngs_dir:
        assert not os.path.isdir(record_pngs_dir)
        os.makedirs(record_pngs_dir)
    if record_json_dir:
        assert not os.path.isdir(record_json_dir)
        os.makedirs(record_json_dir)

    # Create a Proximal Policy Optimization agent
    agent = training_agent.initialize(env, lstm=args.lstm)
    if os.path.isdir(params_dir):
        agent.restore_model(params_dir)
    atexit.register(functools.partial(clean_up_agents, agents))
    wrapped_env = WrappedEnv(env, visualize=args.render)
    runner = Runner(agent=agent, environment=wrapped_env)
    runner.run(num_episodes=1000,
               max_episode_timesteps=2000,
               testing=args.testing)
    agent.save_model("{}/params".format(params_dir))
    print("Stats: ", runner.episode_rewards, runner.episode_timesteps,
          runner.episode_times)

    try:
        runner.close()
    except AttributeError as e:
        pass
def main():
    '''CLI interface to bootstrap taining'''
    parser = argparse.ArgumentParser(description="Playground Flags.")
    parser.add_argument("--game", default="pommerman", help="Game to choose.")
    parser.add_argument("--config",
                        default="PommeFFACompetition-v0",
                        help="Configuration to execute. See env_ids in "
                        "configs.py for options.")
    parser.add_argument("--agents",
                        default="tensorforce::ppo,test::agents.SimpleAgent,"
                        "test::agents.SimpleAgent,test::agents.SimpleAgent",
                        help="Comma delineated list of agent types and docker "
                        "locations to run the agents.")
    parser.add_argument("--agent_env_vars",
                        help="Comma delineated list of agent environment vars "
                        "to pass to Docker. This is only for the Docker Agent."
                        " An example is '0:foo=bar:baz=lar,3:foo=lam', which "
                        "would send two arguments to Docker Agent 0 and one to"
                        " Docker Agent 3.",
                        default="")
    parser.add_argument("--record_pngs_dir",
                        default=None,
                        help="Directory to record the PNGs of the game. "
                        "Doesn't record if None.")
    parser.add_argument("--record_json_dir",
                        default=None,
                        help="Directory to record the JSON representations of "
                        "the game. Doesn't record if None.")
    parser.add_argument("--render",
                        default=False,
                        action='store_true',
                        help="Whether to render or not. Defaults to False.")
    parser.add_argument("--game_state_file",
                        default=None,
                        help="File from which to load game state. Defaults to "
                        "None.")
    parser.add_argument(
        "--num_procs",
        default=12,
        type=int,
        help="Number of parallel threads to run. Defaults to 12.")
    args = parser.parse_args()

    config = args.config
    record_pngs_dir = args.record_pngs_dir
    record_json_dir = args.record_json_dir
    agent_env_vars = args.agent_env_vars
    game_state_file = args.game_state_file
    num_procs = args.num_procs

    # TODO: After https://github.com/MultiAgentLearning/playground/pull/40
    #       this is still missing the docker_env_dict parsing for the agents.
    agents = [
        helpers.make_agent_from_string(agent_string, agent_id + 1000)
        for agent_id, agent_string in enumerate(args.agents.split(","))
    ]

    env = make(config, agents, game_state_file)
    training_agent = None

    for agent in agents:
        if type(agent) == TensorForceAgent:
            training_agent = agent
            env.set_training_agent(agent.agent_id)
            break

    if args.record_pngs_dir:
        assert not os.path.isdir(args.record_pngs_dir)
        os.makedirs(args.record_pngs_dir)
    if args.record_json_dir:
        assert not os.path.isdir(args.record_json_dir)
        os.makedirs(args.record_json_dir)

    # Create a Proximal Policy Optimization agent
    agent = training_agent.initialize(env,
                                      num_procs,
                                      summarizer={
                                          'directory': 'tensorforce_agent',
                                          'labels': 'all'
                                      },
                                      saver={
                                          'directory': './saved_models',
                                          'filename': 'ppo'
                                      })

    hist = load_model(agent, './saved_models')

    atexit.register(functools.partial(clean_up_agents, agents))

    wrapped_envs = []
    for i in range(num_procs):
        wrapped_envs.append(WrappedEnv(env, visualize=args.render))

    runner_time = timeit.default_timer()

    for i in range(1):
        runner = ParallelRunner(agent=agent, environments=wrapped_envs)
        runner.run(num_episodes=1000, max_episode_timesteps=2000)

        print("Stats: ", runner.episode_rewards, runner.episode_timesteps,
              runner.episode_seconds)
        hist = {
            "episode_rewards":
            hist.episode_rewards.extend(runner.episode_rewards),
            "episode_timesteps":
            hist.episode_timesteps.extend(runner.episode_timesteps),
            "episode_times":
            hist.episode_seconds.extend(runner.episode_seconds)
        }

    print('Runner time: ', timeit.default_timer() - runner_time)

    save_model(agent, 'saved_models\\ppo', hist, True)

    plt.plot(runner.episode_rewards)
    plt.show()
    try:
        runner.close()
    except AttributeError as e:
        pass