Exemplo n.º 1
0
    def test_continuous_mlp_policy(self):
        continuous_mlp_policy = ContinuousMLPPolicy(env_spec=self.env,
                                                    hidden_sizes=(1, ))
        self.sess.run(tf.global_variables_initializer())

        obs = self.env.observation_space.high
        assert continuous_mlp_policy.get_action(obs)
    def test_get_action(self, obs_dim, action_dim):
        """Test get_action method"""
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        policy = ContinuousMLPPolicy(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, _ = policy.get_action(obs.flatten())

        assert env.action_space.contains(action)

        actions, _ = policy.get_actions(
            [obs.flatten(), obs.flatten(),
             obs.flatten()])
        for action in actions:
            assert env.action_space.contains(action)
    def test_get_action(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'continuous_mlp_policy.MLPModel'),
                        new=SimpleMLPModel):
            policy = ContinuousMLPPolicy(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, _ = policy.get_action(obs)

        expected_action = np.full(action_dim, 0.5)

        assert env.action_space.contains(action)
        assert np.array_equal(action, expected_action)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
            assert np.array_equal(action, expected_action)
Exemplo n.º 4
0
    def test_get_action(self, obs_dim, action_dim, obs_type):
        """Test get_action method"""
        assert obs_type in ['box', 'dict']
        if obs_type == 'box':
            env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        else:
            env = GymEnv(
                DummyDictEnv(obs_space_type='box', act_space_type='box'))

        policy = ContinuousMLPPolicy(env_spec=env.spec)

        env.reset()
        obs = env.step(1).observation
        if obs_type == 'box':
            obs = obs.flatten()

        action, _ = policy.get_action(obs)

        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
Exemplo n.º 5
0
def osimArm(ctxt=None, seed=1):
    """Train TRPO with CartPole-v1 environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.

    """
    set_seed(seed)
    with LocalTFRunner(ctxt) as runner:

        env = GarageEnv(Arm2DVecEnv())
        env.reset()

        policy = ContinuousMLPPolicy(env_spec=env.spec,
                                     hidden_sizes=[64, 64],
                                     hidden_nonlinearity=tf.nn.relu,
                                     output_nonlinearity=tf.nn.tanh)

        exploration_policy = AddOrnsteinUhlenbeckNoise(env.spec,
                                                       policy,
                                                       sigma=0.2)

        qf = ContinuousMLPQFunction(env_spec=env.spec,
                                    hidden_sizes=[64, 64],
                                    hidden_nonlinearity=tf.nn.relu)

        replay_buffer = PathBuffer(capacity_in_transitions=int(1e6))

        ddpg = DDPG(env_spec=env.spec,
                    policy=policy,
                    policy_lr=1e-4,
                    qf_lr=1e-3,
                    max_path_length=200,
                    qf=qf,
                    replay_buffer=replay_buffer,
                    steps_per_epoch=20,
                    target_update_tau=1e-2,
                    n_train_steps=50,
                    discount=0.9,
                    min_buffer_size=int(1e4),
                    exploration_policy=exploration_policy,
                    policy_optimizer=tf.compat.v1.train.AdamOptimizer,
                    qf_optimizer=tf.compat.v1.train.AdamOptimizer)

        env.render()
        obs = env.step(env.action_space.sample())
        steps = 0
        n_steps = 1000

        while True:
            if steps == n_steps:
                env.close()
                break
            temp = policy.get_action(obs[0])
            obs = env.step(temp[0])
            env.render()
            steps += 1

        runner.setup(algo=ddpg, env=env)

        runner.train(n_epochs=500, batch_size=100)
Exemplo n.º 6
0
class TestContinuousMLPPolicyWithModelTransit(TfGraphTestCase):
    def setup_method(self):
        with mock.patch('tensorflow.random.normal') as mock_rand:
            mock_rand.return_value = 0.5
            super().setup_method()
            self.box_env = TfEnv(DummyBoxEnv())
            self.policy1 = ContinuousMLPPolicy(
                env_spec=self.box_env, hidden_sizes=(32, 32), name='P1')
            self.policy2 = ContinuousMLPPolicy(
                env_spec=self.box_env, hidden_sizes=(64, 64), name='P2')
            self.policy3 = ContinuousMLPPolicyWithModel(
                env_spec=self.box_env, hidden_sizes=(32, 32), name='P3')
            self.policy4 = ContinuousMLPPolicyWithModel(
                env_spec=self.box_env, hidden_sizes=(64, 64), name='P4')

            self.sess.run(tf.compat.v1.global_variables_initializer())
            for a, b in zip(self.policy3.get_params(),
                            self.policy1.get_params()):
                self.sess.run(a.assign(b))
            for a, b in zip(self.policy4.get_params(),
                            self.policy2.get_params()):
                self.sess.run(a.assign(b))

            self.obs = self.box_env.reset()
            self.action_bound = self.box_env.action_space.high
            assert self.policy1.vectorized == self.policy2.vectorized
            assert self.policy3.vectorized == self.policy4.vectorized

    @mock.patch('numpy.random.normal')
    def test_get_action(self, mock_rand):
        mock_rand.return_value = 0.5
        action1, _ = self.policy1.get_action(self.obs)
        action2, _ = self.policy2.get_action(self.obs)
        action3, _ = self.policy3.get_action(self.obs)
        action4, _ = self.policy4.get_action(self.obs)

        assert np.array_equal(action1, action3 * self.action_bound)
        assert np.array_equal(action2, action4 * self.action_bound)

        actions1, _ = self.policy1.get_actions([self.obs, self.obs])
        actions2, _ = self.policy2.get_actions([self.obs, self.obs])
        actions3, _ = self.policy3.get_actions([self.obs, self.obs])
        actions4, _ = self.policy4.get_actions([self.obs, self.obs])

        assert np.array_equal(actions1, actions3 * self.action_bound)
        assert np.array_equal(actions2, actions4 * self.action_bound)

    def test_get_action_sym(self):
        obs_dim = self.box_env.spec.observation_space.flat_dim
        state_input = tf.compat.v1.placeholder(
            tf.float32, shape=(None, obs_dim))

        action_sym1 = self.policy1.get_action_sym(
            state_input, name='action_sym')
        action_sym2 = self.policy2.get_action_sym(
            state_input, name='action_sym')
        action_sym3 = self.policy3.get_action_sym(
            state_input, name='action_sym')
        action_sym4 = self.policy4.get_action_sym(
            state_input, name='action_sym')

        action1 = self.sess.run(
            action_sym1, feed_dict={state_input: [self.obs]})
        action2 = self.sess.run(
            action_sym2, feed_dict={state_input: [self.obs]})
        action3 = self.sess.run(
            action_sym3, feed_dict={state_input: [self.obs]})
        action4 = self.sess.run(
            action_sym4, feed_dict={state_input: [self.obs]})

        assert np.array_equal(action1, action3 * self.action_bound)
        assert np.array_equal(action2, action4 * self.action_bound)
Exemplo n.º 7
0
 def test_continuous_mlp_policy(self):
     continuous_mlp_policy = ContinuousMLPPolicy(env_spec=self.env,
                                                 hidden_sizes=(1, ))
     obs = self.env.observation_space.high
     assert continuous_mlp_policy.get_action(obs)