Exemplo n.º 1
0
def test_simple_metacurriculum(curriculum_brain_name):
    env = Simple1DEnvironment(use_discrete=False)
    with patch("builtins.open",
               new_callable=mock_open,
               read_data=dummy_curriculum_json_str):
        curriculum = Curriculum("TestBrain.json")
    mc = MetaCurriculumTest({curriculum_brain_name: curriculum})
    _check_environment_trains(env, META_CURRICULUM_CONFIG, mc, -100.0)
Exemplo n.º 2
0
def test_simple_metacurriculum(curriculum_brain_name):
    env = Simple1DEnvironment([BRAIN_NAME], use_discrete=False)
    curriculum_config = json.loads(dummy_curriculum_json_str)
    mc = MetaCurriculum({curriculum_brain_name: curriculum_config})
    _check_environment_trains(env,
                              TRAINER_CONFIG,
                              meta_curriculum=mc,
                              success_threshold=None)
def test_subprocess_env_endtoend(num_envs):
    env_manager = SubprocessEnvManager(simple_env_factory,
                                       EngineConfig.default_config(), num_envs)
    trainer_config = generate_config(PPO_CONFIG)
    # Run PPO using env_manager
    _check_environment_trains(
        simple_env_factory(0, []),
        trainer_config,
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(val > 0.99
               for val in StatsReporter.writers[0].get_last_rewards().values())
    env_manager.close()
def test_subprocess_env_endtoend(num_envs):
    def simple_env_factory(worker_id, config):
        env = SimpleEnvironment(["1D"], use_discrete=True)
        return env

    env_manager = SubprocessEnvManager(simple_env_factory,
                                       EngineConfig.default_config(), num_envs)
    # Run PPO using env_manager
    _check_environment_trains(
        simple_env_factory(0, []),
        {"1D": PPO_CONFIG},
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(val > 0.7
               for val in StatsReporter.writers[0].get_last_rewards().values())
    env_manager.close()
Exemplo n.º 5
0
def test_simple_metacurriculum(curriculum_brain_name):
    env = Simple1DEnvironment(use_discrete=False)
    curriculum_config = json.loads(dummy_curriculum_json_str)
    mc = MetaCurriculum({curriculum_brain_name: curriculum_config})
    _check_environment_trains(env, TRAINER_CONFIG, mc, -100.0)
Exemplo n.º 6
0
def test_simple_metacurriculum(curriculum_brain_name):
    env = SimpleEnvironment([BRAIN_NAME], use_discrete=False)
    mc = MetaCurriculum({curriculum_brain_name: dummy_curriculum_config})
    _check_environment_trains(env, {BRAIN_NAME: PPO_CONFIG},
                              meta_curriculum=mc,
                              success_threshold=None)