示例#1
0
def test_bcmodule_defaults():
    # See if default values match
    mock_specs = mb.create_mock_3dball_behavior_specs()
    bc_settings = BehavioralCloningSettings(demo_path=CONTINUOUS_DEMO_PATH)
    bc_module = create_bc_module(mock_specs, bc_settings, False, False)
    assert bc_module.num_epoch == 3
    assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size
    # Assign strange values and see if it overrides properly
    bc_settings = BehavioralCloningSettings(
        demo_path=CONTINUOUS_DEMO_PATH, num_epoch=100, batch_size=10000
    )
    bc_module = create_bc_module(mock_specs, bc_settings, False, False)
    assert bc_module.num_epoch == 100
    assert bc_module.batch_size == 10000
示例#2
0
def test_gail_cc(trainer_config, gail_dummy_config):
    trainer_config.behavioral_cloning = BehavioralCloningSettings(
        demo_path=CONTINUOUS_PATH)
    optimizer = create_optimizer_mock(trainer_config, gail_dummy_config, False,
                                      False, False)
    reward_signal_eval(optimizer, "gail")
    reward_signal_update(optimizer, "gail")
示例#3
0
def test_gail_visual_sac(simple_record, use_discrete):
    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
    env = SimpleEnvironment(
        [BRAIN_NAME],
        num_visual=1,
        num_vector=0,
        use_discrete=use_discrete,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32,
                                            demo_path=demo_path)
    }
    hyperparams = attr.evolve(SAC_TF_CONFIG.hyperparameters,
                              learning_rate=3e-4,
                              batch_size=16)
    config = attr.evolve(
        SAC_TF_CONFIG,
        reward_signals=reward_signals,
        hyperparameters=hyperparams,
        behavioral_cloning=bc_settings,
        max_steps=500,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
示例#4
0
def test_bcmodule_dc_visual_update(is_sac):
    mock_specs = mb.create_mock_banana_behavior_specs()
    bc_settings = BehavioralCloningSettings(demo_path=DISCRETE_DEMO_PATH)
    bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
    stats = bc_module.update()
    for _, item in stats.items():
        assert isinstance(item, np.float32)
示例#5
0
def test_bcmodule_rnn_update(is_sac):
    mock_specs = mb.create_mock_3dball_behavior_specs()
    bc_settings = BehavioralCloningSettings(demo_path=CONTINUOUS_DEMO_PATH)
    bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
    stats = bc_module.update()
    for _, item in stats.items():
        assert isinstance(item, np.float32)
示例#6
0
def test_bcmodule_update(is_sac):
    mock_specs = mb.create_mock_3dball_behavior_specs()
    bc_settings = BehavioralCloningSettings(
        demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" +
        "test.demo")
    bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
    stats = bc_module.update()
    assert_stats_are_float(stats)
示例#7
0
def test_bcmodule_rnn_update(is_sac):
    mock_specs = mb.create_mock_3dball_behavior_specs()
    bc_settings = BehavioralCloningSettings(
        demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" +
        "test.demo")
    bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
    stats = bc_module.update()
    for _, item in stats.items():
        assert isinstance(item, np.float32)
示例#8
0
def test_bcmodule_dc_visual_update(is_sac):
    mock_brain = mb.create_mock_banana_brain()
    bc_settings = BehavioralCloningSettings(
        demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" +
        "testdcvis.demo")
    bc_module = create_bc_module(mock_brain, bc_settings, False, is_sac)
    stats = bc_module.update()
    for _, item in stats.items():
        assert isinstance(item, np.float32)
示例#9
0
def test_bcmodule_defaults():
    # See if default values match
    mock_specs = mb.create_mock_3dball_behavior_specs()
    bc_settings = BehavioralCloningSettings(
        demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
    )
    bc_module = create_bc_module(mock_specs, bc_settings, False, False)
    assert bc_module.num_epoch == 3
    assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size
    # Assign strange values and see if it overrides properly
    bc_settings = BehavioralCloningSettings(
        demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
        num_epoch=100,
        batch_size=10000,
    )
    bc_module = create_bc_module(mock_specs, bc_settings, False, False)
    assert bc_module.num_epoch == 100
    assert bc_module.batch_size == 10000
示例#10
0
def test_bcmodule_constant_lr_update(is_sac):
    mock_specs = mb.create_mock_3dball_behavior_specs()
    bc_settings = BehavioralCloningSettings(demo_path=CONTINUOUS_DEMO_PATH, steps=0)
    bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
    stats = bc_module.update()
    for _, item in stats.items():
        assert isinstance(item, np.float32)
    old_learning_rate = bc_module.current_lr

    _ = bc_module.update()
    assert old_learning_rate == bc_module.current_lr
示例#11
0
def test_bcmodule_linear_lr_update(is_sac):
    mock_specs = mb.create_mock_3dball_behavior_specs()
    bc_settings = BehavioralCloningSettings(
        demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
        steps=100,
    )
    bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
    # Should decay by 10/100 * 0.0003 = 0.00003
    bc_module.policy.get_current_step = MagicMock(return_value=10)
    old_learning_rate = bc_module.current_lr
    _ = bc_module.update()
    assert old_learning_rate - 0.00003 == pytest.approx(bc_module.current_lr, abs=0.01)
def test_gail(simple_record, use_discrete, trainer_config):
    demo_path = simple_record(use_discrete)
    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    }
    config = attr.evolve(
        trainer_config,
        reward_signals=reward_signals,
        behavioral_cloning=bc_settings,
        max_steps=500,
    )
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
示例#13
0
def test_bcmodule_constant_lr_update(is_sac):
    mock_specs = mb.create_mock_3dball_behavior_specs()
    bc_settings = BehavioralCloningSettings(
        demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
        steps=0,
    )
    bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
    stats = bc_module.update()
    for _, item in stats.items():
        assert isinstance(item, np.float32)
    old_learning_rate = bc_module.current_lr

    _ = bc_module.update()
    assert old_learning_rate == bc_module.current_lr
示例#14
0
def test_gail(simple_record, action_sizes, trainer_config):
    demo_path = simple_record(action_sizes)
    env = SimpleEnvironment([BRAIN_NAME],
                            action_sizes=action_sizes,
                            step_size=0.2)
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32,
                                            demo_path=demo_path)
    }
    config = attr.evolve(
        trainer_config,
        reward_signals=reward_signals,
        behavioral_cloning=bc_settings,
        max_steps=500,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
示例#15
0
def test_gail_visual_ppo(simple_record, action_sizes):
    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
    env = SimpleEnvironment(
        [BRAIN_NAME],
        num_visual=1,
        num_vector=0,
        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    }
    hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=5e-3)
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        reward_signals=reward_signals,
        hyperparameters=hyperparams,
        behavioral_cloning=bc_settings,
        max_steps=1000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)