def loader(): env = suite.load( domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs) env.task.visualize_reward = FLAGS.visualize_reward if FLAGS.action_noise > 0: env = action_noise.Wrapper(env, scale=FLAGS.action_noise) return env
def test_error_if_action_bounds_non_finite(self, lower, upper): action_spec = self.make_action_spec(lower=lower, upper=upper) env = self.make_mock_env(action_spec=action_spec) with self.assertRaisesWithLiteralMatch( ValueError, action_noise._BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec)): _ = action_noise.Wrapper(env)
def test_action_spec(self): env = self.make_mock_env() wrapped_env = action_noise.Wrapper(env) # `env.action_spec()` is called in `Wrapper.__init__()` env.action_spec.reset_mock() action_spec = wrapped_env.action_spec() env.action_spec.assert_called_once_with() self.assertIs(action_spec, env.action_spec())
def test_action_clipping(self, action, noise): lower = -1. upper = 1. expected_noisy_action = np.clip(action + noise, lower, upper) task = mock.Mock(spec=control.Task) task.random = mock.Mock(spec=np.random.RandomState) task.random.normal.return_value = noise action_spec = self.make_action_spec(lower=lower, upper=upper) env = self.make_mock_env(action_spec=action_spec) env.task = task wrapped_env = action_noise.Wrapper(env) time_step = wrapped_env.step(action) self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action) self.assertIs(time_step, env.step(expected_noisy_action))
def loader(): try: env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs) except ValueError: task_kwargs['params'] = [0.25, 1.0, 8] env = custom_suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs) env.task.visualize_reward = FLAGS.visualize_reward if FLAGS.action_noise > 0: env = action_noise.Wrapper(env, scale=FLAGS.action_noise) return env
def test_step(self, lower, upper, scale): seed = 0 std = scale * (upper - lower) expected_noise = np.random.RandomState(seed).normal(scale=std) action = np.random.RandomState(seed).uniform(lower, upper) expected_noisy_action = np.clip(action + expected_noise, lower, upper) task = mock.Mock(spec=control.Task) task.random = np.random.RandomState(seed) action_spec = self.make_action_spec(lower=lower, upper=upper) env = self.make_mock_env(action_spec=action_spec) env.task = task wrapped_env = action_noise.Wrapper(env, scale=scale) time_step = wrapped_env.step(action) self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action) self.assertIs(time_step, env.step(expected_noisy_action))
def __init__(self, env, symbolic, seed, max_episode_length, action_repeat, bit_depth, action_noise_scale=None, render_size=64, use_rgbgr=False): from dm_control import suite from dm_control.suite.wrappers import pixels from dm_control.suite.wrappers import action_noise domain, task = env.split('-') self.symbolic = symbolic self.render_size = render_size if render_size else 64 self.use_rgbgr = use_rgbgr if self.use_rgbgr: self.obs_tuple = (4, render_size, render_size) else: self.obs_tuple = (3, render_size, render_size) self.action_noise_scale = action_noise_scale self._env = suite.load(domain_name=domain, task_name=task, task_kwargs={'random': seed}) if not symbolic: self._env = pixels.Wrapper(self._env) if self.action_noise_scale is not None: self._env = action_noise.Wrapper(self._env, scale=self.action_noise_scale) self.max_episode_length = max_episode_length if action_repeat < 0: try: action_repeat = CONTROL_SUITE_ACTION_REPEATS[domain] except KeyError: action_repeat = 2 self.action_repeat = action_repeat try: if action_repeat != CONTROL_SUITE_ACTION_REPEATS[domain]: print( 'Using action repeat %d; recommended action repeat for domain is %d' % (action_repeat, CONTROL_SUITE_ACTION_REPEATS[domain])) except KeyError: pass self.bit_depth = bit_depth
def test_getattr(self, attribute_name): env = self.make_mock_env() wrapped_env = action_noise.Wrapper(env) attr = getattr(wrapped_env, attribute_name) self.assertIs(attr, getattr(env, attribute_name))
def test_observation_spec(self): env = self.make_mock_env() wrapped_env = action_noise.Wrapper(env) observation_spec = wrapped_env.observation_spec() env.observation_spec.assert_called_once_with() self.assertIs(observation_spec, env.observation_spec())
def test_reset(self): env = self.make_mock_env() wrapped_env = action_noise.Wrapper(env) time_step = wrapped_env.reset() env.reset.assert_called_once_with() self.assertIs(time_step, env.reset())