Python numerical_diff_policy示例

编程语言: Python

命名空间/包名称: mushroom_rl.utils.numerical_gradient

方法/功能: numerical_diff_policy

hotexamples.com的示例: 2

Python numerical_diff_policy - 已找到2个示例。这些是从开源项目中提取的最受好评的mushroom_rl.utils.numerical_gradient.numerical_diff_policy现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def test_multivariate_state_std_gaussian():
    np.random.seed(88)
    n_dims = 5
    n_outs = 3

    mu_approximator = Regressor(LinearApproximator,
                                input_shape=(n_dims, ),
                                output_shape=(n_outs, ))

    std_approximator = Regressor(LinearApproximator,
                                 input_shape=(n_dims, ),
                                 output_shape=(n_outs, ))

    pi = StateStdGaussianPolicy(mu_approximator, std_approximator)
    weights = np.random.rand(pi.weights_size) + .1
    pi.set_weights(weights)

    x = np.random.randn(20, n_dims)

    for x_i in x:
        state = np.atleast_1d(x_i)
        action = pi.draw_action(state)
        exact_diff = pi.diff(state, action)
        numerical_diff = numerical_diff_policy(pi, state, action)

        assert np.allclose(exact_diff, numerical_diff)

示例#2

显示文件

def test_multivariate_gaussian():
    np.random.seed(88)
    n_dims = 5
    n_outs = 3

    random_matrix = np.random.rand(n_outs, n_outs)

    sigma = random_matrix.dot(random_matrix.T)

    approximator = Regressor(LinearApproximator,
                             input_shape=(n_dims, ),
                             output_shape=(n_outs, ))

    pi = GaussianPolicy(approximator, sigma)
    mu_weights = np.random.rand(pi.weights_size)
    pi.set_weights(mu_weights)

    x = np.random.randn(20, n_dims)

    for x_i in x:
        state = np.atleast_1d(x_i)
        action = pi.draw_action(state)
        exact_diff = pi.diff(state, action)
        numerical_diff = numerical_diff_policy(pi, state, action)

        assert np.allclose(exact_diff, numerical_diff)