示例#1
0
    def test_likelihood(self):
        """Check likelihood and log-likelihood computation."""
        net_in = tf.placeholder(tf.float64, shape=(None, 2), name='input')
        log_std = tf.placeholder(tf.float64, shape=(), name='log_std')
        net_out = net_in
        policy = policies.GaussianPolicy(net_in, net_out, 2, log_std)
        with self.session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            for _ in range(100):
                test_in = np.random.randn(1, 2)
                log_std_in = np.random.randn()
                test_actions = np.random.randn(1, 2)
                lik = sess.run(policy.likelihood_op(test_actions), {
                    net_in: test_in,
                    log_std: log_std_in
                })
                log_lik = sess.run(policy.log_likelihood_op(test_actions), {
                    net_in: test_in,
                    log_std: log_std_in
                })

                # Likelihood
                lik_test = np.zeros(2)
                lik_test[0] = scipy.stats.multivariate_normal.pdf(
                    test_actions[0, 0],
                    mean=test_in.ravel()[0],
                    cov=np.exp(log_std_in)**2)
                lik_test[1] = scipy.stats.multivariate_normal.pdf(
                    test_actions[0, 1],
                    mean=test_in.ravel()[1],
                    cov=np.exp(log_std_in)**2)
                self.assertAllClose(lik_test[0] * lik_test[1], lik[0, 0])

                # Log likelihood
                log_lik_test = np.zeros(2)
                log_lik_test[0] = scipy.stats.multivariate_normal.logpdf(
                    test_actions[0, 0],
                    mean=test_in.ravel()[0],
                    cov=np.exp(log_std_in)**2)
                log_lik_test[1] = scipy.stats.multivariate_normal.logpdf(
                    test_actions[0, 1],
                    mean=test_in.ravel()[1],
                    cov=np.exp(log_std_in)**2)
                self.assertAllClose(log_lik_test[0] + log_lik_test[1],
                                    log_lik[0, 0])
示例#2
0
    def test_sample(self):
        """Verify whether samples of policy are approximately Gaussian."""
        net_in = tf.placeholder(tf.float64, shape=(None, 1), name='input')
        log_std = tf.placeholder(tf.float64, shape=(), name='log_std')
        net_out = net_in
        policy = policies.GaussianPolicy(net_in, net_out, 1, log_std)
        with self.session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            test_in = np.array([[
                np.random.randn(),
            ]])
            test_log_std = np.random.uniform(-1, 0.5)
            test_in_mult = np.repeat(test_in, 10000, axis=0)
            # Test various ways to create samples
            samples_1 = policy.sample(test_in_mult, sess,
                                      {log_std: test_log_std})
            samples_2 = sess.run(policy.sample_op()[0], {
                net_in: test_in_mult,
                log_std: test_log_std
            })

            samples_3 = sess.run(
                policy.sample_op(10000)[0], {
                    net_in: test_in,
                    log_std: test_log_std
                }).ravel()
            # Check if approximately normally distributed
            samples = [samples_1, samples_2, samples_3]
            pvals = [
                scipy.stats.normaltest((s - test_in[0, 0]) / test_log_std)[1]
                for s in samples
            ]
            means = [s.mean() for s in samples]
            for pval in pvals:
                self.assertGreater(pval, 1e-4)

            # Verify shapes
            self.assertEqual(samples[0].shape[0], test_in_mult.shape[0])
            self.assertEqual(samples[0].shape[1], test_in_mult.shape[1])
            self.assertEqual(samples[1].shape[0], test_in_mult.shape[0])
            self.assertEqual(samples[1].shape[1], test_in_mult.shape[1])
            # Check if sample mean corresponds to actual mean
            for mean in means:
                self.assertAlmostEqual(mean, test_in[0, 0], places=1)
 def test_mean_std(self):
   """Simple check of the mean and standard deviation."""
   net_in = tf.placeholder(tf.float64, shape=(None, 1), name='input')
   log_std = tf.placeholder(tf.float64, shape=(), name='log_std')
   net_out = net_in
   policy = policies.GaussianPolicy(net_in, net_out, 1, log_std)
   with self.session() as sess:
     init = tf.global_variables_initializer()
     sess.run(init)
     for _ in range(10):
       test_in = np.random.randn(1, 1)
       log_std_in = np.random.randn()
       mean, std, log_std_out = sess.run(policy.mean_std_log_std_op(), {
           net_in: test_in,
           log_std: log_std_in
       })
       self.assertAlmostEqual(mean[0, 0], test_in[0, 0])
       self.assertAlmostEqual(log_std_out, log_std_in)
       self.assertAlmostEqual(std, np.exp(log_std_in))
示例#4
0
    def test_learn_simple_policy(self):
        """Train a gaussian "policy" to react differently to various inputs.

    Inputs are sampled from a 2D Gaussian distribution.
    Outputs are one dimensional.
    """
        input_means = np.array([[-1., -1], [-1, 1], [1, -1], [1, 1]])
        input_std = .1
        output_means = np.array([[0.], [1], [2], [3]])

        network_generator = networks.FullyConnectedNetworkGenerator(
            2, 1, (
                64,
                64,
            ), tf.nn.relu)
        weights = network_generator.construct_network_weights()
        net_in = tf.placeholder(tf.float32, shape=(None, 2), name='input')
        net_out = network_generator.construct_network(net_in, weights)
        policy = policies.GaussianPolicy(net_in, net_out, 1, -5.)

        actions = tf.placeholder(tf.float32, shape=(None, 1), name='actions')
        log_lik = policy.log_likelihood_op(actions)
        optimizer = tf.train.AdamOptimizer(0.001)
        minimizer = optimizer.minimize(-tf.reduce_mean(log_lik))

        pol_mean, _ = policy.mean_op()

        with self.session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            for _ in range(1000):
                sample_input = np.repeat(input_means, 100, axis=0)
                sample_input += np.random.normal(0, input_std,
                                                 sample_input.shape)
                sample_output = np.repeat(output_means, 100, axis=0)
                sess.run(minimizer, {
                    net_in: sample_input,
                    actions: sample_output
                })
            output_means_res = sess.run(pol_mean, {net_in: input_means})
            mae = np.mean(np.abs(output_means - output_means_res))
            self.assertAlmostEqual(mae, 0, places=1)