Python model_policy_gradient примеры использования

Язык программирования: Python

Пространство имен/Пакет: recirq.optimize.mpg

Метод/Функция: model_policy_gradient

Примеров на hotexamples.com: 4

Python model_policy_gradient - 4 примера найдено. Это лучшие примеры Python кода для recirq.optimize.mpg.model_policy_gradient, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def test_model_policy_gradient_with_random_seed():
    x0 = np.random.randn(5)
    result1 = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        max_iterations=50,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        random_state=65536,
    )
    result2 = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        max_iterations=50,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        random_state=65536,
    )

    np.testing.assert_equal(result1, result2)

Пример #2

Показать файл

def test_model_policy_gradient_limited_iterations():
    x0 = np.random.randn(10)
    result = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        known_values=None,
        max_iterations=15,
    )

    assert isinstance(result.x, np.ndarray)
    assert isinstance(result.fun, float)
    assert result.nit == 15

Пример #3

Показать файл

def test_model_policy_gradient():
    x0 = np.random.randn(5)
    result = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        max_iterations=100,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        known_values=None,
    )

    np.testing.assert_allclose(result.x, np.zeros(len(result.x)), atol=1e-2)
    np.testing.assert_allclose(result.fun, 0, atol=1e-7)
    assert isinstance(result.nfev, int)

Пример #4

Показать файл

def test_model_policy_gradient_with_known_values():
    x0 = np.random.randn(5)
    known_xs = [np.ones(5)]
    known_ys = [10.0]
    _ = model_policy_gradient(
        sum_of_squares,
        x0,
        learning_rate=1e-1,
        decay_rate=0.96,
        decay_steps=10,
        log_sigma_init=-6.0,
        max_iterations=50,
        batch_size=30,
        radius_coeff=3.0,
        warmup_steps=10,
        known_values=(known_xs, known_ys),
    )

    assert len(known_xs) == 1
    assert len(known_ys) == 1