Пример #1
0
def test_simple_metacurriculum(curriculum_brain_name):
    env = Simple1DEnvironment([BRAIN_NAME], use_discrete=False)
    curriculum_config = json.loads(dummy_curriculum_json_str)
    mc = MetaCurriculum({curriculum_brain_name: curriculum_config})
    _check_environment_trains(env,
                              TRAINER_CONFIG,
                              meta_curriculum=mc,
                              success_threshold=None)
Пример #2
0
def test_visual_sac(num_visual, use_discrete):
    env = Simple1DEnvironment([BRAIN_NAME],
                              use_discrete=use_discrete,
                              num_visual=num_visual,
                              num_vector=0)
    override_vals = {"batch_size": 16, "learning_rate": 3e-4}
    config = generate_config(SAC_CONFIG, override_vals)
    _check_environment_trains(env, config)
Пример #3
0
def test_visual_ppo(num_visual, use_discrete):
    env = Simple1DEnvironment([BRAIN_NAME],
                              use_discrete=use_discrete,
                              num_visual=num_visual,
                              num_vector=0)
    override_vals = {"learning_rate": 3.0e-4}
    config = generate_config(PPO_CONFIG, override_vals)
    _check_environment_trains(env, config)
Пример #4
0
def test_simple_ghost_fails(use_discrete):
    env = Simple1DEnvironment(
        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
    )
    _check_environment_trains(env, GHOST_CONFIG_FAIL, success_threshold=None)
    processed_rewards = [
        default_reward_processor(rewards) for rewards in env.final_rewards.values()
    ]
    success_threshold = 0.99
    assert any(reward > success_threshold for reward in processed_rewards) and any(
        reward < success_threshold for reward in processed_rewards
    )
Пример #5
0
def test_simple_ghost(use_discrete):
    env = Simple1DEnvironment([BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"],
                              use_discrete=use_discrete)
    override_vals = {
        "max_steps": 2500,
        "self_play": {
            "play_against_current_self_ratio": 1.0,
            "save_steps": 2000,
            "swap_steps": 2000,
        },
    }
    config = generate_config(PPO_CONFIG, override_vals)
    _check_environment_trains(env, config)
Пример #6
0
def test_visual_advanced_ppo(vis_encode_type, num_visual):
    env = Simple1DEnvironment(
        [BRAIN_NAME],
        use_discrete=True,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        vis_obs_size=(36, 36, 3),
    )
    override_vals = {
        "learning_rate": 3.0e-4,
        "vis_encode_type": vis_encode_type,
        "max_steps": 500,
        "summary_freq": 100,
    }
    config = generate_config(PPO_CONFIG, override_vals)
    # The number of steps is pretty small for these encoders
    _check_environment_trains(env, config, success_threshold=0.5)
Пример #7
0
def test_simple_ghost_fails(use_discrete):
    env = Simple1DEnvironment(
        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
    )
    # This config should fail because the ghosted policy is never swapped with a competent policy.
    # Swap occurs after max step is reached.
    override_vals = {
        "max_steps": 2500,
        "self_play": {
            "play_against_current_self_ratio": 1.0,
            "save_steps": 2000,
            "swap_steps": 4000,
        },
    }
    config = generate_config(PPO_CONFIG, override_vals)
    _check_environment_trains(env, config, success_threshold=None)
    processed_rewards = [
        default_reward_processor(rewards) for rewards in env.final_rewards.values()
    ]
    success_threshold = 0.9
    assert any(reward > success_threshold for reward in processed_rewards) and any(
        reward < success_threshold for reward in processed_rewards
    )
Пример #8
0
def test_simple_sac(use_discrete):
    env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
    config = generate_config(SAC_CONFIG)
    _check_environment_trains(env, config)
Пример #9
0
def test_simple_ghost(use_discrete):
    env = Simple1DEnvironment(
        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
    )
    _check_environment_trains(env, GHOST_CONFIG_PASS)
Пример #10
0
def test_simple_sac(use_discrete):
    env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
    _check_environment_trains(env, SAC_CONFIG)
def simple_env_factory(worker_id, config):
    env = Simple1DEnvironment(["1D"], use_discrete=True)
    return env