def test_rendering(self): bench = CMAESBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() with pytest.raises(NotImplementedError): wrapped.render_state_tracking() bench = CMAESBenchmark() def dummy(): return [1, [2, 3]] bench.config.state_method = dummy bench.config.observation_space = gym.spaces.Tuple( ( gym.spaces.Discrete(2), gym.spaces.Box(low=np.array([-1, 1]), high=np.array([5, 5])), ) ) env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() with pytest.raises(NotImplementedError): wrapped.render_state_tracking() def dummy2(): return [0.5] bench.config.state_method = dummy2 bench.config.observation_space = gym.spaces.Box( low=np.array([0]), high=np.array([1]) ) env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() wrapped.step(1) wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) bench = LubyBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env, 2) wrapped.reset() wrapped.step(1) wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) class discrete_obs_env: def __init__(self): self.observation_space = gym.spaces.Discrete(2) self.action_space = gym.spaces.Discrete(2) self.reward_range = (1, 2) self.metadata = {} def reset(self): return 1 def step(self, action): return 1, 1, 1, 1 env = discrete_obs_env() wrapped = StateTrackingWrapper(env, 2) wrapped.reset() wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) class multi_discrete_obs_env: def __init__(self): self.observation_space = gym.spaces.MultiDiscrete([2, 3]) self.action_space = gym.spaces.Discrete(2) self.reward_range = (1, 2) self.metadata = {} def reset(self): return [1, 2] def step(self, action): return [1, 2], 1, 1, 1 env = multi_discrete_obs_env() wrapped = StateTrackingWrapper(env) wrapped.reset() wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) class multi_binary_obs_env: def __init__(self): self.observation_space = gym.spaces.MultiBinary(2) self.action_space = gym.spaces.Discrete(2) self.reward_range = (1, 2) self.metadata = {} def reset(self): return [1, 1] def step(self, action): return [1, 1], 1, 1, 1 env = multi_binary_obs_env() wrapped = StateTrackingWrapper(env) wrapped.reset() wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3)
def test_get_env(self): bench = CMAESBenchmark() env = bench.get_environment() self.assertTrue(issubclass(type(env), CMAESEnv))
import numpy as np from examples.example_utils import make_chainer_a3c, train_chainer from dacbench.benchmarks import CMAESBenchmark from dacbench.wrappers import EpisodeTimeWrapper # Helper function to flatten observation space def flatten(li): return [value for sublist in li for value in sublist] # Make CMA-ES environment # We use the configuration from the "Learning to Optimize Step-size Adaption in CMA-ES" Paper by Shala et al. bench = CMAESBenchmark() env = bench.get_environment() # Wrap environment to track time # Here we also want the mean of each 2 step interval env = EpisodeTimeWrapper(env, 2) # Make chainer agent space_array = [ env.observation_space[k].low for k in list(env.observation_space.spaces.keys()) ] obs_size = np.array(flatten(space_array)).size action_size = env.action_space.low.size agent = make_chainer_a3c(obs_size, action_size) # Train agent for 10 episodes