def test_invalid_action_spaces(self):
     """Test that policy raises error if passed a box obs space."""
     env = GymEnv(DummyDictEnv(act_space_type='box'))
     with pytest.raises(ValueError):
         CategoricalCNNPolicy(env=env,
                              kernel_sizes=(3, ),
                              hidden_channels=(3, ))
Пример #2
0
    def test_dict_space(self):
        ext.set_seed(0)

        # A dummy dict env
        dummy_env = DummyDictEnv()
        dummy_act = dummy_env.action_space
        dummy_act_sample = dummy_act.sample()

        # A dummy dict env wrapped by garage.tf
        tf_env = TfEnv(dummy_env)
        tf_act = tf_env.action_space
        tf_obs = tf_env.observation_space

        # flat_dim
        assert tf_act.flat_dim == tf_act.flatten(dummy_act_sample).shape[-1]

        # flat_dim_with_keys
        assert tf_obs.flat_dim == tf_obs.flat_dim_with_keys(
            iter(["achieved_goal", "desired_goal", "observation"]))

        # un/flatten
        assert tf_act.unflatten(
            tf_act.flatten(dummy_act_sample)) == dummy_act_sample

        # un/flatten_n
        samples = [dummy_act.sample() for _ in range(10)]
        assert tf_act.unflatten_n(tf_act.flatten_n(samples)) == samples

        # un/flatten_with_keys
        assert tf_act.unflatten_with_keys(
            tf_act.flatten_with_keys(dummy_act_sample, iter(["action"])),
            iter(["action"]))
Пример #3
0
 def setup_method(self):
     self.env = GymEnv(DummyDictEnv())
     self.obs = self.env.reset()[0]
     self._replay_k = 4
     self.replay_buffer = HERReplayBuffer(env_spec=self.env.spec,
                                          capacity_in_transitions=10,
                                          replay_k=self._replay_k,
                                          reward_fn=self.env.compute_reward)
Пример #4
0
 def test_does_not_support_dict_obs_space(self):
     """Test that policy raises error if passed a dict obs space."""
     env = GymEnv(DummyDictEnv(act_space_type='discrete'))
     with pytest.raises(ValueError,
                        match=('CNN policies do not support '
                               'with akro.Dict observation spaces.')):
         CategoricalCNNPolicy(env=env,
                              kernel_sizes=(3, ),
                              hidden_channels=(3, ))
 def test_does_not_support_dict_obs_space(self, filters, strides, padding,
                                          hidden_sizes):
     """Test that policy raises error if passed a dict obs space."""
     env = GymEnv(DummyDictEnv(act_space_type='discrete'))
     with pytest.raises(ValueError):
         CategoricalCNNPolicy(env_spec=env.spec,
                              filters=filters,
                              strides=strides,
                              padding=padding,
                              hidden_sizes=hidden_sizes)
Пример #6
0
    def test_get_action_dict_space(self):
        env = GymEnv(DummyDictEnv(obs_space_type='box', act_space_type='box'))
        policy = GaussianMLPPolicy(env_spec=env.spec)
        obs = env.reset()[0]

        action, _ = policy.get_action(obs)
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
Пример #7
0
    def test_get_action_dict_space(self):
        env = GymEnv(DummyDictEnv(obs_space_type='box', act_space_type='box'))
        policy = GaussianGRUPolicy(env_spec=env.spec,
                                   hidden_dim=4,
                                   state_include_action=False)
        policy.reset(do_resets=None)
        obs = env.reset()[0]

        action, _ = policy.get_action(obs)
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
Пример #8
0
    def test_get_action_dict_space(self):
        """Test if observations from dict obs spaces are properly flattened."""
        env = GymEnv(DummyDictEnv(obs_space_type='box', act_space_type='box'))
        policy = TanhGaussianMLPPolicy(env_spec=env.spec,
                                       hidden_nonlinearity=None,
                                       hidden_sizes=(1, ),
                                       hidden_w_init=nn.init.ones_,
                                       output_w_init=nn.init.ones_)
        obs = env.reset()[0]

        action, _ = policy.get_action(obs)
        assert env.action_space.shape == action.shape

        actions, _ = policy.get_actions(np.array([obs, obs]))
        for action in actions:
            assert env.action_space.shape == action.shape
Пример #9
0
    def test_algo_with_goal_without_es(self):
        # This tests if sampler works properly when algorithm
        # includes goal but is without exploration policy
        env = DummyDictEnv()
        policy = DummyPolicy(env)
        replay_buffer = SimpleReplayBuffer(env_spec=env,
                                           size_in_transitions=int(1e6),
                                           time_horizon=100)
        algo = DummyOffPolicyAlgo(env_spec=env,
                                  qf=None,
                                  replay_buffer=replay_buffer,
                                  policy=policy,
                                  exploration_strategy=None)

        sampler = OffPolicyVectorizedSampler(algo, env, 1, no_reset=True)
        sampler.start_worker()
        sampler.obtain_samples(0, 30)
    def test_get_action(self, obs_dim, action_dim, obs_type):
        assert obs_type in ['discrete', 'dict']
        if obs_type == 'discrete':
            env = GymEnv(
                DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        else:
            env = GymEnv(
                DummyDictEnv(obs_space_type='box', act_space_type='discrete'))
        policy = CategoricalMLPPolicy(env_spec=env.spec)
        obs = env.reset()[0]
        if obs_type == 'discrete':
            obs = obs.flatten()
        action, _ = policy.get_action(obs)
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
Пример #11
0
    def test_q_vals_goal_conditioned(self):
        env = GarageEnv(DummyDictEnv())
        with mock.patch(('garage.tf.q_functions.'
                         'continuous_mlp_q_function.MLPMergeModel'),
                        new=SimpleMLPMergeModel):
            qf = ContinuousMLPQFunction(env_spec=env.spec)
        env.reset()
        obs, _, _, _ = env.step(1)
        obs = np.concatenate(
            (obs['observation'], obs['desired_goal'], obs['achieved_goal']),
            axis=-1)
        act = np.full((1, ), 0.5).flatten()

        expected_output = np.full((1, ), 0.5)

        outputs = qf.get_qval([obs], [act])
        assert np.array_equal(outputs[0], expected_output)

        outputs = qf.get_qval([obs, obs, obs], [act, act, act])
        for output in outputs:
            assert np.array_equal(output, expected_output)
    def test_q_vals_input_include_goal(self):
        env = TfEnv(DummyDictEnv())
        with mock.patch(('garage.tf.q_functions.'
                         'continuous_mlp_q_function_with_model.MLPMergeModel'),
                        new=SimpleMLPMergeModel):
            qf = ContinuousMLPQFunctionWithModel(env_spec=env.spec,
                                                 input_include_goal=True)
        env.reset()
        obs, _, _, _ = env.step(1)
        obs = np.concatenate((obs['observation'], obs['desired_goal']),
                             axis=-1)
        act = np.full((1, ), 0.5).flatten()

        expected_output = np.full((1, ), 0.5)
        obs_ph, act_ph = qf.inputs

        outputs = qf.get_qval([obs], [act])
        assert np.array_equal(outputs[0], expected_output)

        outputs = qf.get_qval([obs, obs, obs], [act, act, act])
        for output in outputs:
            assert np.array_equal(output, expected_output)
Пример #13
0
    def test_get_action(self, obs_dim, action_dim, obs_type):
        """Test get_action method"""
        assert obs_type in ['box', 'dict']
        if obs_type == 'box':
            env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        else:
            env = GymEnv(
                DummyDictEnv(obs_space_type='box', act_space_type='box'))

        policy = ContinuousMLPPolicy(env_spec=env.spec)

        env.reset()
        obs = env.step(1).observation
        if obs_type == 'box':
            obs = obs.flatten()

        action, _ = policy.get_action(obs)

        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
Пример #14
0
class TestHerReplayBuffer:
    def setup_method(self):
        self.env = DummyDictEnv()
        self.obs = self.env.reset()
        self.replay_buffer = HerReplayBuffer(
            env_spec=self.env.spec,
            size_in_transitions=3,
            time_horizon=1,
            replay_k=0.4,
            reward_fun=self.env.compute_reward)

    def _add_single_transition(self):
        self.replay_buffer.add_transition(
            observation=self.obs,
            action=self.env.action_space.sample(),
            terminal=False,
            next_observation=self.obs)

    def _add_transitions(self):
        self.replay_buffer.add_transitions(
            observation=[self.obs],
            action=[self.env.action_space.sample()],
            terminal=[False],
            next_observation=[self.obs])

    def test_add_transition_dtype(self):
        self._add_single_transition()
        sample = self.replay_buffer.sample(1)

        assert sample['observation'].dtype == self.env.observation_space[
            'observation'].dtype
        assert sample['achieved_goal'].dtype == self.env.observation_space[
            'achieved_goal'].dtype
        assert sample['goal'].dtype == self.env.observation_space[
            'desired_goal'].dtype
        assert sample['action'].dtype == self.env.action_space.dtype

    def test_add_transitions_dtype(self):
        self._add_transitions()
        sample = self.replay_buffer.sample(1)

        assert sample['observation'].dtype == self.env.observation_space[
            'observation'].dtype
        assert sample['achieved_goal'].dtype == self.env.observation_space[
            'achieved_goal'].dtype
        assert sample['goal'].dtype == self.env.observation_space[
            'desired_goal'].dtype
        assert sample['action'].dtype == self.env.action_space.dtype

    def test_eviction_policy(self):
        self.replay_buffer.add_transitions(
            observation=[self.obs, self.obs],
            next_observation=[self.obs, self.obs],
            terminal=[False, False],
            action=[1, 2])
        assert not self.replay_buffer.full
        self.replay_buffer.add_transitions(
            observation=[self.obs, self.obs],
            next_observation=[self.obs, self.obs],
            terminal=[False, False],
            action=[3, 4])
        assert self.replay_buffer.full
        self.replay_buffer.add_transitions(
            observation=[self.obs, self.obs],
            next_observation=[self.obs, self.obs],
            terminal=[False, False],
            action=[5, 6])
        self.replay_buffer.add_transitions(
            observation=[self.obs, self.obs],
            next_observation=[self.obs, self.obs],
            terminal=[False, False],
            action=[7, 8])

        assert np.array_equal(self.replay_buffer._buffer['action'],
                              [[7], [8], [6]])
        assert self.replay_buffer.n_transitions_stored == 3

    def test_pickleable(self):
        self._add_transitions()
        replay_buffer_pickled = pickle.loads(pickle.dumps(self.replay_buffer))
        assert replay_buffer_pickled._buffer.keys(
        ) == self.replay_buffer._buffer.keys()
        for k in replay_buffer_pickled._buffer:
            assert replay_buffer_pickled._buffer[
                k].shape == self.replay_buffer._buffer[k].shape
        sample = self.replay_buffer.sample(1)
        sample2 = replay_buffer_pickled.sample(1)
        for k in self.replay_buffer._buffer:
            assert sample[k].shape == sample2[k].shape
Пример #15
0
 def test_invalid_action_spaces(self):
     """Test that policy raises error if passed a dict obs space."""
     env = GymEnv(DummyDictEnv(act_space_type='box'))
     with pytest.raises(ValueError):
         qf = SimpleQFunction(env.spec)
         DiscreteQFArgmaxPolicy(env_spec=env.spec, qf=qf)
class TestHerReplayBuffer:
    def setup_method(self):
        self.env = DummyDictEnv()
        self.obs = self.env.reset()
        self._replay_k = 4
        self.replay_buffer = HERReplayBuffer(env_spec=self.env.spec,
                                             capacity_in_transitions=10,
                                             replay_k=self._replay_k,
                                             reward_fn=self.env.compute_reward)

    def test_replay_k(self):
        self.replay_buffer = HERReplayBuffer(env_spec=self.env.spec,
                                             capacity_in_transitions=10,
                                             replay_k=0,
                                             reward_fn=self.env.compute_reward)

        with pytest.raises(ValueError):
            self.replay_buffer = HERReplayBuffer(
                env_spec=self.env.spec,
                capacity_in_transitions=10,
                replay_k=0.2,
                reward_fn=self.env.compute_reward)

    def _add_one_path(self):
        path = dict(
            observations=np.asarray([self.obs, self.obs]),
            actions=np.asarray([
                self.env.action_space.sample(),
                self.env.action_space.sample()
            ]),
            rewards=np.asarray([[1], [1]]),
            terminals=np.asarray([[False], [False]]),
            next_observations=np.asarray([self.obs, self.obs]),
        )
        self.replay_buffer.add_path(path)

    def test_add_path(self):
        self._add_one_path()

        # HER buffer should add replay_k + 1 transitions to the buffer
        # for each transition in the given path. This doesn't apply to
        # the last transition, where only that transition gets added.

        path_len = 2
        total_expected_transitions = sum(
            [self._replay_k + 1 for _ in range(path_len - 1)]) + 1
        assert (self.replay_buffer.n_transitions_stored ==
                total_expected_transitions)
        assert (len(
            self.replay_buffer._path_segments) == total_expected_transitions -
                1)
        # check that buffer has the correct keys
        assert {
            'observations', 'next_observations', 'actions', 'rewards',
            'terminals'
        } <= set(self.replay_buffer._buffer)

        # check that dict obses are flattened
        obs = self.replay_buffer._buffer['observations'][0]
        next_obs = self.replay_buffer._buffer['next_observations'][0]
        assert obs.shape == self.env.spec.observation_space.flat_dim
        assert next_obs.shape == self.env.spec.observation_space.flat_dim

    def test_pickleable(self):
        self._add_one_path()
        replay_buffer_pickled = pickle.loads(pickle.dumps(self.replay_buffer))
        assert (replay_buffer_pickled._buffer.keys() ==
                self.replay_buffer._buffer.keys())
        for k in replay_buffer_pickled._buffer:
            assert replay_buffer_pickled._buffer[
                k].shape == self.replay_buffer._buffer[k].shape
        sample = self.replay_buffer.sample_transitions(1)
        sample2 = replay_buffer_pickled.sample_transitions(1)
        for k in sample.keys():
            assert sample[k].shape == sample2[k].shape
        assert len(sample) == len(sample2)
Пример #17
0
 def test_not_box(self):
     with pytest.raises(ValueError):
         dict_env = GarageEnv(DummyDictEnv())
         ContinuousCNNQFunction(env_spec=dict_env.spec,
                                filters=((5, (3, 3)), ),
                                strides=(1, ))