def test_create_manager(): run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml)) param_manager = EnvironmentParameterManager( run_options.environment_parameters, 1337, False ) assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100 assert param_manager.get_current_lesson_number() == { "param_1": 0, "param_2": 0, "param_3": 0, } assert param_manager.get_current_samplers() == { "param_1": ConstantSettings(seed=1337, value=1), "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), } # Not enough episodes completed assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 99}, ) == (False, False) # Not enough episodes reward assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1] * 101}, ) == (False, False) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (True, True) assert param_manager.get_current_lesson_number() == { "param_1": 1, "param_2": 0, "param_3": 0, } param_manager_2 = EnvironmentParameterManager( run_options.environment_parameters, 1337, restore=True ) # The use of global status should make it so that the lesson numbers are maintained assert param_manager_2.get_current_lesson_number() == { "param_1": 1, "param_2": 0, "param_3": 0, } # No reset required assert param_manager.update_lessons( trainer_steps={"fake_behavior": 700}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [0] * 101}, ) == (True, False) assert param_manager.get_current_samplers() == { "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3), "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), }
def test_curriculum_raises_all_completion_criteria_conversion(): with pytest.warns(TrainerConfigWarning): run_options = RunOptions.from_dict( yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml) ) param_manager = EnvironmentParameterManager( run_options.environment_parameters, 1337, False ) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (True, True) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (True, True) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, ) == (False, False) assert param_manager.get_current_lesson_number() == {"param_1": 2}