Exemplo n.º 1
0
def test_simple_asymm_ghost_fails(use_discrete):
    # Make opponent for asymmetric case
    brain_name_opp = BRAIN_NAME + "Opp"
    env = SimpleEnvironment(
        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"],
        use_discrete=use_discrete)
    # This config should fail because the team that us not learning when both have reached
    # max step should be executing the initial, untrained poliy.
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=0.0,
        save_steps=5000,
        swap_steps=5000,
        team_change=2000,
    )
    config = attr.evolve(PPO_CONFIG,
                         self_play=self_play_settings,
                         max_steps=2000)
    _check_environment_trains(env, {
        BRAIN_NAME: config,
        brain_name_opp: config
    },
                              success_threshold=None)
    processed_rewards = [
        default_reward_processor(rewards)
        for rewards in env.final_rewards.values()
    ]
    success_threshold = 0.9
    assert any(reward > success_threshold
               for reward in processed_rewards) and any(
                   reward < success_threshold for reward in processed_rewards)
Exemplo n.º 2
0
def test_gail_visual_ppo(simple_record, action_sizes):
    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
    env = SimpleEnvironment(
        [BRAIN_NAME],
        num_visual=1,
        num_vector=0,
        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32,
                                            demo_path=demo_path)
    }
    hyperparams = attr.evolve(PPO_TF_CONFIG.hyperparameters,
                              learning_rate=3e-4)
    config = attr.evolve(
        PPO_TF_CONFIG,
        reward_signals=reward_signals,
        hyperparameters=hyperparams,
        behavioral_cloning=bc_settings,
        max_steps=1000,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
Exemplo n.º 3
0
def test_visual_advanced_sac(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        use_discrete=True,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        vis_obs_size=(36, 36, 3),
    )
    new_networksettings = attr.evolve(
        SAC_CONFIG.network_settings,
        vis_encode_type=EncoderType(vis_encode_type))
    new_hyperparams = attr.evolve(
        SAC_CONFIG.hyperparameters,
        batch_size=16,
        learning_rate=3e-4,
        buffer_init_steps=0,
    )
    config = attr.evolve(
        SAC_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=100,
    )
    # The number of steps is pretty small for these encoders
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
Exemplo n.º 4
0
def test_visual_advanced_sac(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
    )
    new_networksettings = attr.evolve(
        SAC_TF_CONFIG.network_settings,
        vis_encode_type=EncoderType(vis_encode_type))
    new_hyperparams = attr.evolve(
        SAC_TF_CONFIG.hyperparameters,
        batch_size=16,
        learning_rate=3e-4,
        buffer_init_steps=0,
    )
    config = attr.evolve(
        SAC_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=200,
        framework=FrameworkType.TENSORFLOW,
    )
    # The number of steps is pretty small for these encoders
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
Exemplo n.º 5
0
def test_2d_ppo(use_discrete):
    env = SimpleEnvironment([BRAIN_NAME],
                            use_discrete=use_discrete,
                            action_size=2,
                            step_size=0.5)
    config = generate_config(PPO_CONFIG)
    _check_environment_trains(env, config)
Exemplo n.º 6
0
def test_simple_asymm_ghost_fails(use_discrete):
    # Make opponent for asymmetric case
    brain_name_opp = BRAIN_NAME + "Opp"
    env = SimpleEnvironment(
        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"],
        use_discrete=use_discrete)
    # This config should fail because the team that us not learning when both have reached
    # max step should be executing the initial, untrained poliy.
    override_vals = {
        "max_steps": 2000,
        "self_play": {
            "play_against_latest_model_ratio": 0.0,
            "save_steps": 5000,
            "swap_steps": 5000,
            "team_change": 2000,
        },
    }
    config = generate_config(PPO_CONFIG, override_vals)
    config[brain_name_opp] = config[BRAIN_NAME]
    _check_environment_trains(env, config, success_threshold=None)
    processed_rewards = [
        default_reward_processor(rewards)
        for rewards in env.final_rewards.values()
    ]
    success_threshold = 0.9
    assert any(reward > success_threshold
               for reward in processed_rewards) and any(
                   reward < success_threshold for reward in processed_rewards)
Exemplo n.º 7
0
def test_gail_visual_ppo(simple_record, use_discrete):
    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
    env = SimpleEnvironment(
        [BRAIN_NAME],
        num_visual=1,
        num_vector=0,
        use_discrete=use_discrete,
        step_size=0.2,
    )
    override_vals = {
        "max_steps": 500,
        "learning_rate": 3.0e-4,
        "behavioral_cloning": {
            "demo_path": demo_path,
            "strength": 1.0,
            "steps": 1000
        },
        "reward_signals": {
            "gail": {
                "strength": 1.0,
                "gamma": 0.99,
                "encoding_size": 32,
                "demo_path": demo_path,
            }
        },
    }
    config = generate_config(PPO_CONFIG, override_vals)
    _check_environment_trains(env, config, success_threshold=0.9)
Exemplo n.º 8
0
def test_2d_sac(use_discrete):
    env = SimpleEnvironment(
        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
    )
    new_hyperparams = attr.evolve(SAC_CONFIG.hyperparameters, buffer_init_steps=2000)
    config = attr.evolve(SAC_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
Exemplo n.º 9
0
def test_gail_visual_sac(simple_record, use_discrete):
    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
    env = SimpleEnvironment(
        [BRAIN_NAME],
        num_visual=1,
        num_vector=0,
        use_discrete=use_discrete,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32,
                                            demo_path=demo_path)
    }
    hyperparams = attr.evolve(SAC_CONFIG.hyperparameters,
                              learning_rate=3e-4,
                              batch_size=16)
    config = attr.evolve(
        SAC_CONFIG,
        reward_signals=reward_signals,
        hyperparameters=hyperparams,
        behavioral_cloning=bc_settings,
        max_steps=500,
    )
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
Exemplo n.º 10
0
def test_2d_sac(use_discrete):
    env = SimpleEnvironment(
        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
    )
    override_vals = {"buffer_init_steps": 2000, "max_steps": 4000}
    config = generate_config(SAC_CONFIG, override_vals)
    _check_environment_trains(env, config, success_threshold=0.8)
Exemplo n.º 11
0
def test_hybrid_visual_ppo(num_visual):
    env = SimpleEnvironment([BRAIN_NAME],
                            num_visual=num_visual,
                            num_vector=0,
                            action_sizes=(1, 1))
    new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
    check_environment_trains(env, {BRAIN_NAME: config})
Exemplo n.º 12
0
def test_2d_sac(action_sizes):
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters, buffer_init_steps=2000
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
def test_2d_ppo(use_discrete):
    env = SimpleEnvironment(
        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
    )
    new_hyperparams = attr.evolve(
        PPO_CONFIG.hyperparameters, batch_size=64, buffer_size=640
    )
    config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
    _check_environment_trains(env, {BRAIN_NAME: config})
def test_simple_ghost(use_discrete):
    env = SimpleEnvironment(
        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000
    )
    config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2500)
    _check_environment_trains(env, {BRAIN_NAME: config})
def test_simple_metacurriculum(curriculum_brain_name):
    env = SimpleEnvironment([BRAIN_NAME], use_discrete=False)
    curriculum_config = json.loads(dummy_curriculum_json_str)
    mc = MetaCurriculum({curriculum_brain_name: curriculum_config})
    trainer_config = yaml.safe_load(TRAINER_CONFIG)
    _check_environment_trains(env,
                              trainer_config,
                              meta_curriculum=mc,
                              success_threshold=None)
Exemplo n.º 16
0
def test_visual_sac(num_visual, use_discrete):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        use_discrete=use_discrete,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
    )
    override_vals = {"batch_size": 16, "learning_rate": 3e-4}
    config = generate_config(SAC_CONFIG, override_vals)
    _check_environment_trains(env, config)
def test_visual_ppo(num_visual, use_discrete):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        use_discrete=use_discrete,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
    )
    new_hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3.0e-4)
    config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams)
    _check_environment_trains(env, {BRAIN_NAME: config})
Exemplo n.º 18
0
def test_var_len_obs_ppo(num_vis, num_vector, num_var_len, action_sizes):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=action_sizes,
        num_visual=num_vis,
        num_vector=num_vector,
        num_var_len=num_var_len,
        step_size=0.2,
    )
    new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
    check_environment_trains(env, {BRAIN_NAME: config})
Exemplo n.º 19
0
def test_simple_ghost(use_discrete):
    env = SimpleEnvironment([BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"],
                            use_discrete=use_discrete)
    override_vals = {
        "max_steps": 2500,
        "self_play": {
            "play_against_latest_model_ratio": 1.0,
            "save_steps": 2000,
            "swap_steps": 2000,
        },
    }
    config = generate_config(PPO_CONFIG, override_vals)
    _check_environment_trains(env, config)
Exemplo n.º 20
0
def test_visual_sac(num_visual, action_sizes):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
    )
    new_hyperparams = attr.evolve(SAC_TORCH_CONFIG.hyperparameters,
                                  batch_size=16,
                                  learning_rate=3e-4)
    config = attr.evolve(SAC_TORCH_CONFIG, hyperparameters=new_hyperparams)
    check_environment_trains(env, {BRAIN_NAME: config})
Exemplo n.º 21
0
def test_simple_ghost(action_sizes):
    env = SimpleEnvironment([BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"],
                            action_sizes=action_sizes)
    self_play_settings = SelfPlaySettings(play_against_latest_model_ratio=1.0,
                                          save_steps=2000,
                                          swap_steps=2000)
    config = attr.evolve(
        PPO_TF_CONFIG,
        self_play=self_play_settings,
        max_steps=2500,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config})
Exemplo n.º 22
0
def test_hybrid_ppo(action_size):
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
    new_network_settings = attr.evolve(PPO_TORCH_CONFIG.network_settings)
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=1024
    )
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=10000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_gail(simple_record, use_discrete, trainer_config):
    demo_path = simple_record(use_discrete)
    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    }
    config = attr.evolve(
        trainer_config,
        reward_signals=reward_signals,
        behavioral_cloning=bc_settings,
        max_steps=500,
    )
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_simple_asymm_ghost(use_discrete):
    # Make opponent for asymmetric case
    brain_name_opp = BRAIN_NAME + "Opp"
    env = SimpleEnvironment(
        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0,
        save_steps=10000,
        swap_steps=10000,
        team_change=400,
    )
    config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=4000)
    _check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
Exemplo n.º 25
0
def test_2d_ppo(action_sizes):
    env = SimpleEnvironment([BRAIN_NAME],
                            action_sizes=action_sizes,
                            step_size=0.8)
    new_hyperparams = attr.evolve(PPO_TF_CONFIG.hyperparameters,
                                  batch_size=64,
                                  buffer_size=640)
    config = attr.evolve(
        PPO_TF_CONFIG,
        hyperparameters=new_hyperparams,
        max_steps=10000,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config})
Exemplo n.º 26
0
def test_hybrid_visual_sac(num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME], num_visual=num_visual, num_vector=0, action_sizes=(1, 1)
    )
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        buffer_size=50000,
        batch_size=128,
        learning_rate=3.0e-4,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=3000
    )
    check_environment_trains(env, {BRAIN_NAME: config})
Exemplo n.º 27
0
def test_hybrid_sac(action_size):
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)

    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        buffer_size=50000,
        batch_size=256,
        buffer_init_steps=0,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2200
    )
    check_environment_trains(
        env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1336
    )
Exemplo n.º 28
0
def test_hybrid_visual_ppo(num_visual, training_seed):
    env = SimpleEnvironment([BRAIN_NAME],
                            num_visual=num_visual,
                            num_vector=0,
                            action_sizes=(1, 1))
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters,
        batch_size=64,
        buffer_size=1024,
        learning_rate=1e-4,
    )
    config = attr.evolve(PPO_TORCH_CONFIG,
                         hyperparameters=new_hyperparams,
                         max_steps=8000)
    check_environment_trains(env, {BRAIN_NAME: config},
                             training_seed=training_seed)
Exemplo n.º 29
0
def test_visual_ppo(num_visual, action_sizes):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
    )
    new_hyperparams = attr.evolve(PPO_TF_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(
        PPO_TF_CONFIG,
        hyperparameters=new_hyperparams,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config})
Exemplo n.º 30
0
def test_visual_sac(num_visual, use_discrete):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        use_discrete=use_discrete,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
    )
    new_hyperparams = attr.evolve(SAC_TF_CONFIG.hyperparameters,
                                  batch_size=16,
                                  learning_rate=3e-4)
    config = attr.evolve(
        SAC_TF_CONFIG,
        hyperparameters=new_hyperparams,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config})