Python model_policy_gradient示例

编程语言: Python

命名空间/包名称: recirq.optimize.mpg

方法/功能: model_policy_gradient

hotexamples.com的示例: 4

Python model_policy_gradient - 已找到4个示例。这些是从开源项目中提取的最受好评的recirq.optimize.mpg.model_policy_gradient现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def test_model_policy_gradient_with_random_seed():
    x0 = np.random.randn(5)
    result1 = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        max_iterations=50,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        random_state=65536,
    )
    result2 = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        max_iterations=50,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        random_state=65536,
    )

    np.testing.assert_equal(result1, result2)

示例#2

显示文件

def test_model_policy_gradient_limited_iterations():
    x0 = np.random.randn(10)
    result = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        known_values=None,
        max_iterations=15,
    )

    assert isinstance(result.x, np.ndarray)
    assert isinstance(result.fun, float)
    assert result.nit == 15

示例#3

显示文件

def test_model_policy_gradient():
    x0 = np.random.randn(5)
    result = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        max_iterations=100,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        known_values=None,
    )

    np.testing.assert_allclose(result.x, np.zeros(len(result.x)), atol=1e-2)
    np.testing.assert_allclose(result.fun, 0, atol=1e-7)
    assert isinstance(result.nfev, int)

示例#4

显示文件

def test_model_policy_gradient_with_known_values():
    x0 = np.random.randn(5)
    known_xs = [np.ones(5)]
    known_ys = [10.0]
    _ = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        max_iterations=50,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        known_values=(known_xs, known_ys),
    )

    assert len(known_xs) == 1
    assert len(known_ys) == 1