示例#1
0
def test_multiple_num_agents(local_mode=False):
    num_gpus = 0
    initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus)
    config = _get_default_test_config(
        tune.grid_search([2, 3, 4]), "BipedalWalker-v2", num_gpus
    )
    return tune.run(
        CEPPOTrainer,
        local_dir=get_local_dir(),
        name="DELETEME_TEST_extra_loss_ppo_trainer",
        stop={"timesteps_total": 5000},
        config=config
    )
示例#2
0
def test_cetd3(local_mode=False):
    num_gpus = 0
    initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus)
    config = _get_default_test_config(
        num_agents=3, env_name="BipedalWalker-v2", num_gpus=num_gpus
    )
    if "num_sgd_iter" in config:
        config.pop("num_sgd_iter")
    config.pop("sgd_minibatch_size")
    config['timesteps_per_iteration'] = 80
    config['pure_exploration_steps'] = 80
    config['learning_starts'] = 180
    tune.run(
        CETD3Trainer,
        local_dir=get_local_dir(),
        name="DELETEME_TEST_extra_loss_ppo_trainer",
        stop={"timesteps_total": 2000},
        config=config
    )
示例#3
0
def _base(
        trainer,
        local_mode=False,
        extra_config=None,
        t=500,
        env_name="BipedalWalker-v2",
        num_agents=3
):
    # num_agents = 3
    num_gpus = 0

    initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus)

    config = _get_default_test_config(num_agents, env_name, num_gpus)
    if extra_config:
        config.update(extra_config)
    stop = {"timesteps_total": t} if not isinstance(t, dict) else t
    return tune.run(
        trainer,
        local_dir=get_local_dir(),
        name="DELETEME_TEST_extra_loss_ppo_trainer",
        stop=stop,
        config=config
    )
示例#4
0
        from ray.tune.registry import register_env


        def make_pybullet(_=None):
            import pybullet_envs
            import gym
            print("Successfully import pybullet and found: ",
                  pybullet_envs.getList())
            return gym.make(env_name)


        register_env(env_name, make_pybullet)

    analysis = tune.run(
        "PPO",
        local_dir=get_local_dir(),
        name=exp_name,
        checkpoint_freq=10,
        keep_checkpoints_num=10,
        checkpoint_score_attr="episode_reward_mean",
        checkpoint_at_end=True,
        stop={"info/num_steps_sampled": stop}
        if isinstance(stop, int) else stop,
        config=walker_config,
        max_failures=20,
        reuse_actors=False
    )

    path = "{}-{}-{}ts.pkl".format(
        exp_name, env_name, stop
    )
示例#5
0
def train(
        extra_config,
        trainer,
        env_name,
        stop,
        exp_name,
        num_agents,
        num_seeds,
        num_gpus,
        num_cpus=None,
        test_mode=False,
        address=None,
        redis_password=None,
        clip_memory=False,
        init_memory=None,
        init_object_store_memory=None,
        init_redis_max_memory=None,
        **kwargs
):
    # assert isinstance(stop, int)
    if address is not None:
        num_gpus = None

    if clip_memory:
        init_memory = int(300 * GB)
        init_object_store_memory = int(100 * GB)
        init_redis_max_memory = int(50 * GB)

    initialize_ray(
        test_mode=test_mode,
        local_mode=False,
        num_gpus=num_gpus,
        address=address,
        redis_password=redis_password,
        memory=init_memory,
        object_store_memory=init_object_store_memory,
        redis_max_memory=init_redis_max_memory,
        num_cpus=num_cpus
    )
    env_config = {"env_name": env_name, "num_agents": num_agents}
    config = {
        "seed": tune.grid_search([i * 100 for i in range(num_seeds)]),
        "env": MultiAgentEnvWrapper,
        "env_config": env_config,
        "log_level": "DEBUG" if test_mode else "INFO"
    }
    if extra_config:
        config.update(extra_config)

    analysis = tune.run(
        trainer,
        local_dir=get_local_dir(),
        name=exp_name,
        checkpoint_freq=10,
        keep_checkpoints_num=10,
        checkpoint_score_attr="episode_reward_mean",
        checkpoint_at_end=True,
        stop={"info/num_steps_sampled": stop}
        if isinstance(stop, int) else stop,
        config=config,
        max_failures=20,
        reuse_actors=False,
        **kwargs
    )

    path = "{}-{}-{}ts-{}agents.pkl".format(
        exp_name, env_name, stop, num_agents
    )
    with open(path, "wb") as f:
        data = analysis.fetch_trial_dataframes()
        pickle.dump(data, f)
        print("Result is saved at: <{}>".format(path))

    return analysis