示例#1
0
class TestQfDerivedPolicy(TfGraphTestCase):
    def setUp(self):
        super().setUp()
        self.env = TfEnv(DummyDiscreteEnv())
        self.qf = SimpleQFunction(self.env.spec)
        self.policy = DiscreteQfDerivedPolicy(
            env_spec=self.env.spec, qf=self.qf)
        self.sess.run(tf.global_variables_initializer())
        self.env.reset()

    def test_discrete_qf_derived_policy(self):
        obs, _, _, _ = self.env.step(1)
        action = self.policy.get_action(obs)
        assert self.env.action_space.contains(action)
        actions = self.policy.get_actions([obs])
        for action in actions:
            assert self.env.action_space.contains(action)

    def test_is_pickleable(self):
        with tf.variable_scope('SimpleQFunction/SimpleMLPModel', reuse=True):
            return_var = tf.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())
        obs, _, _, _ = self.env.step(1)
        action1 = self.policy.get_action(obs)

        p = pickle.dumps(self.policy)
        with tf.Session(graph=tf.Graph()):
            policy_pickled = pickle.loads(p)
            action2 = policy_pickled.get_action(obs)
            assert action1 == action2
示例#2
0
    def test_is_pickleable(self, obs_dim, action_dim):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.q_functions.'
                         'discrete_mlp_q_function.MLPModel'),
                        new=SimpleMLPModel):
            qf = DiscreteMLPQFunction(env_spec=env.spec)
        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.variable_scope(
                'discrete_mlp_q_function/discrete_mlp_q_function', reuse=True):
            return_var = tf.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())

        output1 = self.sess.run(qf.q_vals, feed_dict={qf.input: [obs]})

        h_data = pickle.dumps(qf)
        with tf.Session(graph=tf.Graph()) as sess:
            qf_pickled = pickle.loads(h_data)
            input_var = tf.placeholder(tf.float32, shape=(None, ) + obs_dim)

            q_vals = qf_pickled.get_qval_sym(input_var, 'another')
            output2 = sess.run(q_vals, feed_dict={input_var: [obs]})

        assert np.array_equal(output1, output2)
示例#3
0
    def test_get_action(self, mock_rand, obs_dim, action_dim, filter_dims,
                        filter_sizes, strides, padding, hidden_sizes):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_cnn_policy.MLPModel'),
                        new=SimpleMLPModel):
            with mock.patch(('garage.tf.policies.'
                             'categorical_cnn_policy.CNNModel'),
                            new=SimpleCNNModel):
                policy = CategoricalCNNPolicy(env_spec=env.spec,
                                              conv_filters=filter_dims,
                                              conv_filter_sizes=filter_sizes,
                                              conv_strides=strides,
                                              conv_pad=padding,
                                              hidden_sizes=hidden_sizes)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, prob = policy.get_action(obs)
        expected_prob = np.full(action_dim, 0.5)

        assert env.action_space.contains(action)
        assert action == 0
        assert np.array_equal(prob['prob'], expected_prob)

        actions, probs = policy.get_actions([obs, obs, obs])
        for action, prob in zip(actions, probs['prob']):
            assert env.action_space.contains(action)
            assert action == 0
            assert np.array_equal(prob, expected_prob)
示例#4
0
    def test_is_pickleable(self, mock_rand, obs_dim, action_dim):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_cnn_policy.MLPModel'),
                        new=SimpleMLPModel):
            with mock.patch(('garage.tf.policies.'
                             'categorical_cnn_policy.CNNModel'),
                            new=SimpleCNNModel):
                policy = CategoricalCNNPolicy(env_spec=env.spec,
                                              conv_filters=(32, ),
                                              conv_filter_sizes=(3, ),
                                              conv_strides=(1, ),
                                              conv_pad='SAME',
                                              hidden_sizes=(4, ))
        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.compat.v1.variable_scope(
                'CategoricalCNNPolicy/Sequential/MLPModel', reuse=True):
            return_var = tf.compat.v1.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())
        output1 = self.sess.run(policy.model.outputs,
                                feed_dict={policy.model.input: [obs]})
        p = pickle.dumps(policy)

        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(policy_pickled.model.outputs,
                               feed_dict={policy_pickled.model.input: [obs]})
            assert np.array_equal(output1, output2)
示例#5
0
    def test_dist_info_sym(self, obs_dim, action_dim, filter_dims,
                           filter_sizes, strides, padding, hidden_sizes):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_cnn_policy.MLPModel'),
                        new=SimpleMLPModel):
            with mock.patch(('garage.tf.policies.'
                             'categorical_cnn_policy.CNNModel'),
                            new=SimpleCNNModel):
                policy = CategoricalCNNPolicy(env_spec=env.spec,
                                              conv_filters=filter_dims,
                                              conv_filter_sizes=filter_sizes,
                                              conv_strides=strides,
                                              conv_pad=padding,
                                              hidden_sizes=hidden_sizes)

        env.reset()
        obs, _, _, _ = env.step(1)

        expected_prob = np.full(action_dim, 0.5)

        obs_dim = env.spec.observation_space.shape
        state_input = tf.compat.v1.placeholder(tf.float32,
                                               shape=(None, ) + obs_dim)
        dist1 = policy.dist_info_sym(state_input, name='policy2')

        prob = self.sess.run(dist1['prob'], feed_dict={state_input: [obs]})
        assert np.array_equal(prob[0], expected_prob)
    def test_is_pickleable(self, obs_dim, action_dim):
        """Test if ContinuousMLPPolicy is pickleable"""
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'continuous_mlp_policy.MLPModel'),
                        new=SimpleMLPModel):
            policy = ContinuousMLPPolicy(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.compat.v1.variable_scope('ContinuousMLPPolicy/MLPModel',
                                         reuse=True):
            return_var = tf.compat.v1.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())
        output1 = self.sess.run(
            policy.model.outputs,
            feed_dict={policy.model.input: [obs.flatten()]})

        p = pickle.dumps(policy)
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(
                policy_pickled.model.outputs,
                feed_dict={policy_pickled.model.input: [obs.flatten()]})
            assert np.array_equal(output1, output2)
    def test_is_pickleable(self):
        env = TfEnv(DummyBoxEnv(obs_dim=(1, ), action_dim=(1, )))
        with mock.patch(('garage.tf.policies.'
                         'gaussian_lstm_policy_with_model.GaussianLSTMModel'),
                        new=SimpleGaussianLSTMModel):
            policy = GaussianLSTMPolicyWithModel(
                env_spec=env.spec, state_include_action=False)

        env.reset()
        obs = env.reset()

        with tf.variable_scope(
                'GaussianLSTMPolicyWithModel/GaussianLSTMModel', reuse=True):
            return_var = tf.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())

        output1 = self.sess.run(
            policy.model.networks['default'].mean,
            feed_dict={policy.model.input: [[obs.flatten()], [obs.flatten()]]})

        p = pickle.dumps(policy)

        with tf.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(
                policy_pickled.model.networks['default'].mean,
                feed_dict={
                    policy_pickled.model.input: [[obs.flatten()],
                                                 [obs.flatten()]]
                })
            assert np.array_equal(output1, output2)
示例#8
0
class TestResize(unittest.TestCase):
    @overrides
    def setUp(self):
        self.width = 16
        self.height = 16
        self.env = TfEnv(DummyDiscrete2DEnv())
        self.env_r = TfEnv(
            Resize(DummyDiscrete2DEnv(), width=self.width, height=self.height))

    def test_resize_invalid_environment_type(self):
        with self.assertRaises(ValueError):
            self.env.observation_space = Discrete(64)
            Resize(self.env, width=self.width, height=self.height)

    def test_resize_invalid_environment_shape(self):
        with self.assertRaises(ValueError):
            self.env.observation_space = Box(low=0,
                                             high=255,
                                             shape=(4, ),
                                             dtype=np.uint8)
            Resize(self.env, width=self.width, height=self.height)

    def test_resize_output_observation_space(self):
        assert self.env_r.observation_space.shape == (self.width, self.height)

    def test_resize_output_reset(self):
        assert self.env_r.reset().shape == (self.width, self.height)

    def test_resize_output_step(self):
        self.env_r.reset()
        obs_r, _, _, _ = self.env_r.step(1)
        assert obs_r.shape == (self.width, self.height)
示例#9
0
    def test_dist_info(self, obs_dim, embedding_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        with mock.patch(('garage.tf.embeddings.'
                         'gaussian_mlp_encoder.GaussianMLPModel'),
                        new=SimpleGaussianMLPModel):
            embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                       output_space=env.spec.action_space)
            embedding = GaussianMLPEncoder(embedding_spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        obs_dim = env.spec.observation_space.flat_dim
        obs_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, obs_dim))

        dist1_sym = embedding.dist_info_sym(obs_ph, name='p1_sym')

        # flatten output
        expected_mean = [np.full(np.prod(embedding_dim), 0.5)]
        expected_log_std = [np.full(np.prod(embedding_dim), np.log(0.5))]

        prob0 = embedding.dist_info(obs.flatten())
        prob1 = self.sess.run(dist1_sym, feed_dict={obs_ph: [obs.flatten()]})

        assert np.array_equal(prob0['mean'].flatten(), expected_mean[0])
        assert np.array_equal(prob0['log_std'].flatten(), expected_log_std[0])
        assert np.array_equal(prob1['mean'], expected_mean)
        assert np.array_equal(prob1['log_std'], expected_log_std)
示例#10
0
    def test_is_pickleable(self, obs_dim, action_dim, mock_rand):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = CategoricalMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        expected_prob = np.full(action_dim, 0.5)

        p = pickle.dumps(policy)

        with tf.Session(graph=tf.Graph()):
            policy_pickled = pickle.loads(p)
            action, prob = policy_pickled.get_action(obs)
            assert env.action_space.contains(action)
            assert action == 0
            assert np.array_equal(prob['prob'], expected_prob)

            prob1 = policy.dist_info([obs.flatten()])
            prob2 = policy_pickled.dist_info([obs.flatten()])
            assert np.array_equal(prob1['prob'], prob2['prob'])
            assert np.array_equal(prob2['prob'][0], expected_prob)
示例#11
0
    def test_get_action(self, mock_rand, filter_dims, num_filters, strides,
                        padding, hidden_sizes):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy2(env_spec=env.spec,
                                       filter_dims=filter_dims,
                                       num_filters=num_filters,
                                       strides=strides,
                                       padding=padding,
                                       hidden_sizes=hidden_sizes)
        obs_var = tf.compat.v1.placeholder(tf.float32,
                                           shape=(None, ) +
                                           env.observation_space.shape,
                                           name='obs')
        policy.build(obs_var)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, _ = policy.get_action(obs)
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
示例#12
0
    def test_get_action(self, obs_dim, task_num, latent_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)

        env.reset()
        obs, _, _, _ = env.step(1)
        latent = np.random.random((latent_dim, ))
        task = np.zeros(task_num)
        task[0] = 1

        action1, _ = policy.get_action_given_latent(obs, latent)
        action2, _ = policy.get_action_given_task(obs, task)
        action3, _ = policy.get_action(np.concatenate([obs.flatten(), task]))

        assert env.action_space.contains(action1)
        assert env.action_space.contains(action2)
        assert env.action_space.contains(action3)

        obses, latents, tasks = [obs] * 3, [latent] * 3, [task] * 3
        aug_obses = [np.concatenate([obs.flatten(), task])] * 3
        action1n, _ = policy.get_actions_given_latents(obses, latents)
        action2n, _ = policy.get_actions_given_tasks(obses, tasks)
        action3n, _ = policy.get_actions(aug_obses)

        for action in chain(action1n, action2n, action3n):
            assert env.action_space.contains(action)
示例#13
0
    def test_is_pickleable(self, obs_dim, action_dim, mock_rand):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = CategoricalMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.variable_scope('CategoricalMLPPolicy/MLPModel', reuse=True):
            return_var = tf.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())
        output1 = self.sess.run(
            policy.model.outputs,
            feed_dict={policy.model.input: [obs.flatten()]})

        p = pickle.dumps(policy)

        with tf.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(
                policy_pickled.model.outputs,
                feed_dict={policy_pickled.model.input: [obs.flatten()]})
            assert np.array_equal(output1, output2)
示例#14
0
    def test_is_pickleable(self, obs_dim, embedding_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        with mock.patch(('garage.tf.embeddings.'
                         'gaussian_mlp_encoder.GaussianMLPModel'),
                        new=SimpleGaussianMLPModel):
            embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                       output_space=env.spec.action_space)
            embedding = GaussianMLPEncoder(embedding_spec)

        env.reset()
        obs, _, _, _ = env.step(1)
        obs_dim = env.spec.observation_space.flat_dim

        with tf.compat.v1.variable_scope('GaussianMLPEncoder/GaussianMLPModel',
                                         reuse=True):
            return_var = tf.compat.v1.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())
        output1 = self.sess.run(
            embedding.model.outputs[:-1],
            feed_dict={embedding.model.input: [obs.flatten()]})

        p = pickle.dumps(embedding)
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            embedding_pickled = pickle.loads(p)
            output2 = sess.run(
                embedding_pickled.model.outputs[:-1],
                feed_dict={embedding_pickled.model.input: [obs.flatten()]})
            assert np.array_equal(output1, output2)
示例#15
0
    def test_is_pickleable(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.q_functions.'
                         'continuous_mlp_q_function.MLPMergeModel'),
                        new=SimpleMLPMergeModel):
            qf = ContinuousMLPQFunction(env_spec=env.spec)
        env.reset()
        obs, _, _, _ = env.step(1)
        obs = obs.flatten()
        act = np.full(action_dim, 0.5).flatten()

        with tf.compat.v1.variable_scope(
                'ContinuousMLPQFunction/SimpleMLPMergeModel', reuse=True):
            return_var = tf.compat.v1.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())

        output1 = qf.get_qval([obs], [act])

        h_data = pickle.dumps(qf)
        with tf.compat.v1.Session(graph=tf.Graph()):
            qf_pickled = pickle.loads(h_data)
            output2 = qf_pickled.get_qval([obs], [act])

        assert np.array_equal(output1, output2)
示例#16
0
    def test_get_qval_sym(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.q_functions.'
                         'continuous_mlp_q_function.MLPMergeModel'),
                        new=SimpleMLPMergeModel):
            qf = ContinuousMLPQFunction(env_spec=env.spec)
        env.reset()
        obs, _, _, _ = env.step(1)
        obs = obs.flatten()
        act = np.full(action_dim, 0.5).flatten()

        output1 = qf.get_qval([obs], [act])

        input_var1 = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, obs.shape[0]))
        input_var2 = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, act.shape[0]))
        q_vals = qf.get_qval_sym(input_var1, input_var2, 'another')
        output2 = self.sess.run(q_vals,
                                feed_dict={
                                    input_var1: [obs],
                                    input_var2: [act]
                                })

        expected_output = np.full((1, ), 0.5)

        assert np.array_equal(output1, output2)
        assert np.array_equal(output2[0], expected_output)
    def test_get_action(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'gaussian_mlp_policy_with_model.GaussianMLPModel'),
                        new=SimpleGaussianMLPModel):
            policy = GaussianMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, prob = policy.get_action(obs)

        expected_action = np.full(action_dim, 0.75)
        expected_mean = np.full(action_dim, 0.5)
        expected_log_std = np.full(action_dim, 0.5)

        assert env.action_space.contains(action)
        assert np.array_equal(action, expected_action)
        assert np.array_equal(prob['mean'], expected_mean)
        assert np.array_equal(prob['log_std'], expected_log_std)

        actions, probs = policy.get_actions([obs, obs, obs])
        for action, mean, log_std in zip(actions, probs['mean'],
                                         probs['log_std']):
            assert env.action_space.contains(action)
            assert np.array_equal(action, expected_action)
            assert np.array_equal(prob['mean'], expected_mean)
            assert np.array_equal(prob['log_std'], expected_log_std)
示例#18
0
    def test_is_pickleable(self):
        env = TfEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1))
        with mock.patch(('garage.tf.policies.'
                         'categorical_gru_policy_with_model.GRUModel'),
                        new=SimpleGRUModel):
            policy = CategoricalGRUPolicyWithModel(env_spec=env.spec,
                                                   state_include_action=False)

        env.reset()
        obs = env.reset()

        with tf.variable_scope('CategoricalGRUPolicyWithModel/prob_network',
                               reuse=True):
            return_var = tf.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())

        output1 = self.sess.run(
            policy.model.outputs[0],
            feed_dict={policy.model.input: [[obs.flatten()], [obs.flatten()]]})

        p = pickle.dumps(policy)

        with tf.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(policy_pickled.model.outputs[0],
                               feed_dict={
                                   policy_pickled.model.input:
                                   [[obs.flatten()], [obs.flatten()]]
                               })
            assert np.array_equal(output1, output2)
    def test_is_pickleable(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'gaussian_mlp_policy_with_model.GaussianMLPModel'),
                        new=SimpleGaussianMLPModel):
            policy = GaussianMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)
        obs_dim = env.spec.observation_space.flat_dim

        with tf.variable_scope('GaussianMLPPolicyWithModel/GaussianMLPModel',
                               reuse=True):
            return_var = tf.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())
        output1 = self.sess.run(
            policy.model.outputs[:-1],
            feed_dict={policy.model.input: [obs.flatten()]})

        p = pickle.dumps(policy)
        with tf.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(
                policy_pickled.model.outputs[:-1],
                feed_dict={policy_pickled.model.input: [obs.flatten()]})
            assert np.array_equal(output1, output2)
示例#20
0
    def test_output_shape(self, obs_dim, action_dim):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.q_functions.'
                         'discrete_mlp_q_function.MLPModel'),
                        new=SimpleMLPModel):
            qf = DiscreteMLPQFunction(env_spec=env.spec)
        env.reset()
        obs, _, _, _ = env.step(1)

        outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [obs]})
        assert outputs.shape == (1, action_dim)
示例#21
0
    def test_dist_info(self, obs_dim, action_dim):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = CategoricalMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        expected_prob = np.full(action_dim, 0.5)

        policy_probs = policy.dist_info([obs.flatten()])
        assert np.array_equal(policy_probs['prob'][0], expected_prob)
示例#22
0
    def test_output_shape(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.q_functions.'
                         'continuous_mlp_q_function.MLPMergeModel'),
                        new=SimpleMLPMergeModel):
            qf = ContinuousMLPQFunction(env_spec=env.spec)
        env.reset()
        obs, _, _, _ = env.step(1)
        obs = obs.flatten()
        act = np.full(action_dim, 0.5).flatten()

        outputs = qf.get_qval([obs], [act])

        assert outputs.shape == (1, 1)
示例#23
0
    def run_task(self, snapshot_config, *_):
        config = tf.ConfigProto(device_count={'GPU': 0},
                                allow_soft_placement=True,
                                intra_op_parallelism_threads=12,
                                inter_op_parallelism_threads=12)
        sess = tf.Session(config=config)
        with LocalTFRunner(snapshot_config=snapshot_config,
                           sess=sess) as runner:
            env = gym.make(self._env)
            env = TfEnv(normalize(env))
            env.reset()
            policy = GaussianMLPPolicy(
                env_spec=env.spec,
                hidden_sizes=(32, 32),
                hidden_nonlinearity=tf.nn.tanh,
                output_nonlinearity=None,
            )

            baseline = GaussianMLPBaseline(
                env_spec=env.spec,
                regressor_args=dict(
                    hidden_sizes=(64, 64),
                    use_trust_region=False,
                    optimizer=FirstOrderOptimizer,
                    optimizer_args=dict(
                        batch_size=32,
                        max_epochs=10,
                        tf_optimizer_args=dict(learning_rate=1e-3),
                    ),
                ),
            )

            algo = PPO(
                env_spec=env.spec,
                policy=policy,
                baseline=baseline,
                max_path_length=100,
                discount=0.99,
                gae_lambda=0.95,
                lr_clip_range=0.2,
                policy_ent_coeff=0.0,
                optimizer_args=dict(
                    batch_size=32,
                    max_epochs=10,
                    tf_optimizer_args=dict(learning_rate=1e-3),
                ),
            )
            runner.setup(algo, env, sampler_args=dict(n_envs=12))
            runner.train(n_epochs=5, batch_size=2048)
示例#24
0
    def test_get_embedding(self, obs_dim, embedding_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                   output_space=env.spec.action_space)
        embedding = GaussianMLPEncoder(embedding_spec)
        task_input = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, None,
                                                     embedding.input_dim))
        embedding.build(task_input)

        env.reset()
        obs, _, _, _ = env.step(1)

        latent, _ = embedding.forward(obs)
        assert env.action_space.contains(latent)
    def test_is_pickleable(self):
        env = TfEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1))
        obs_var = tf.compat.v1.placeholder(
            tf.float32,
            shape=[None, None, env.observation_space.flat_dim],
            name='obs')
        policy = CategoricalLSTMPolicy2(env_spec=env.spec,
                                        state_include_action=False)

        policy.build(obs_var)
        policy.reset()
        obs = env.reset()

        policy.model._lstm_cell.weights[0].load(
            tf.ones_like(policy.model._lstm_cell.weights[0]).eval())

        output1 = self.sess.run(
            [policy.distribution.logits],
            feed_dict={policy.model.input: [[obs.flatten()], [obs.flatten()]]})

        p = pickle.dumps(policy)

        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            obs_var = tf.compat.v1.placeholder(
                tf.float32,
                shape=[None, None, env.observation_space.flat_dim],
                name='obs')
            policy_pickled.build(obs_var)
            output2 = sess.run([policy_pickled.distribution.logits],
                               feed_dict={
                                   policy_pickled.model.input:
                                   [[obs.flatten()], [obs.flatten()]]
                               })  # noqa: E126
            assert np.array_equal(output1, output2)
示例#26
0
    def test_is_pickleable(self, obs_dim, action_dim):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        obs_var = tf.compat.v1.placeholder(
            tf.float32,
            shape=[None, env.observation_space.flat_dim],
            name='obs')
        policy = CategoricalMLPPolicy2(env_spec=env.spec)

        policy.build(obs_var)
        obs = env.reset()

        with tf.compat.v1.variable_scope(
                'CategoricalMLPPolicy/CategoricalMLPModel', reuse=True):
            bias = tf.compat.v1.get_variable('mlp/hidden_0/bias')
        # assign it to all one
        bias.load(tf.ones_like(bias).eval())
        output1 = self.sess.run(
            [policy.distribution.probs],
            feed_dict={policy.model.input: [obs.flatten()]})

        p = pickle.dumps(policy)

        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            obs_var = tf.compat.v1.placeholder(
                tf.float32,
                shape=[None, env.observation_space.flat_dim],
                name='obs')
            policy_pickled.build(obs_var)
            output2 = sess.run(
                [policy_pickled.distribution.probs],
                feed_dict={policy_pickled.model.input: [obs.flatten()]})
            assert np.array_equal(output1, output2)
    def test_get_qval_max_pooling(self, filter_dims, num_filters, strides,
                                  pool_strides, pool_shapes):
        env = TfEnv(DummyDiscretePixelEnv())
        obs = env.reset()

        with mock.patch(('garage.tf.models.'
                         'cnn_mlp_merge_model.CNNModelWithMaxPooling'),
                        new=SimpleCNNModelWithMaxPooling):
            with mock.patch(('garage.tf.models.'
                             'cnn_mlp_merge_model.MLPMergeModel'),
                            new=SimpleMLPMergeModel):
                qf = ContinuousCNNQFunction(env_spec=env.spec,
                                            filter_dims=filter_dims,
                                            num_filters=num_filters,
                                            strides=strides,
                                            max_pooling=True,
                                            pool_strides=pool_strides,
                                            pool_shapes=pool_shapes)

        action_dim = env.action_space.shape

        obs, _, _, _ = env.step(1)

        act = np.full(action_dim, 0.5)
        expected_output = np.full((1, ), 0.5)

        outputs = qf.get_qval([obs], [act])

        assert np.array_equal(outputs[0], expected_output)

        outputs = qf.get_qval([obs, obs, obs], [act, act, act])

        for output in outputs:
            assert np.array_equal(output, expected_output)
示例#28
0
    def test_dist_info_sym_wrong_input(self):
        env = TfEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1))

        obs_ph = tf.placeholder(tf.float32,
                                shape=(None, None,
                                       env.observation_space.flat_dim))

        with mock.patch(('garage.tf.policies.'
                         'categorical_gru_policy_with_model.GRUModel'),
                        new=SimpleGRUModel):
            policy = CategoricalGRUPolicyWithModel(env_spec=env.spec,
                                                   state_include_action=True)

        policy.reset()
        obs = env.reset()

        policy.dist_info_sym(
            obs_var=obs_ph,
            state_info_vars={'prev_action': np.zeros((3, 1, 1))},
            name='p2_sym')
        # observation batch size = 2 but prev_action batch size = 3
        with pytest.raises(tf.errors.InvalidArgumentError):
            self.sess.run(
                policy.model.networks['p2_sym'].input,
                feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]})
示例#29
0
    def test_get_action(self, mock_rand, obs_dim, action_dim, hidden_dim):
        mock_rand.return_value = 0

        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))

        with mock.patch(('garage.tf.policies.'
                         'categorical_gru_policy_with_model.GRUModel'),
                        new=SimpleGRUModel):
            policy = CategoricalGRUPolicyWithModel(env_spec=env.spec,
                                                   state_include_action=False)

        policy.reset()
        obs = env.reset()

        expected_prob = np.full(action_dim, 0.5)

        action, agent_info = policy.get_action(obs)
        assert env.action_space.contains(action)
        assert action == 0
        assert np.array_equal(agent_info['prob'], expected_prob)

        actions, agent_infos = policy.get_actions([obs])
        for action, prob in zip(actions, agent_infos['prob']):
            assert env.action_space.contains(action)
            assert action == 0
            assert np.array_equal(prob, expected_prob)
    def test_dist_info_sym_include_action(self, obs_dim, action_dim,
                                          hidden_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))

        obs_ph = tf.placeholder(
            tf.float32, shape=(None, None, env.observation_space.flat_dim))

        with mock.patch(('garage.tf.policies.'
                         'gaussian_gru_policy_with_model.GaussianGRUModel'),
                        new=SimpleGaussianGRUModel):
            policy = GaussianGRUPolicyWithModel(
                env_spec=env.spec, state_include_action=True)

            policy.reset()
            obs = env.reset()
            dist_sym = policy.dist_info_sym(
                obs_var=obs_ph,
                state_info_vars={'prev_action': np.zeros((2, 1) + action_dim)},
                name='p2_sym')
        dist = self.sess.run(
            dist_sym, feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]})

        assert np.array_equal(dist['mean'], np.full((2, 1) + action_dim, 0.5))
        assert np.array_equal(dist['log_std'], np.full((2, 1) + action_dim,
                                                       0.5))