def test_model_policy_gradient_with_random_seed(): x0 = np.random.randn(5) result1 = model_policy_gradient( sum_of_squares, x0, learning_rate=1e-1, decay_rate=0.96, decay_steps=10, log_sigma_init=-6.0, max_iterations=50, batch_size=30, radius_coeff=3.0, warmup_steps=10, random_state=65536, ) result2 = model_policy_gradient( sum_of_squares, x0, learning_rate=1e-1, decay_rate=0.96, decay_steps=10, log_sigma_init=-6.0, max_iterations=50, batch_size=30, radius_coeff=3.0, warmup_steps=10, random_state=65536, ) np.testing.assert_equal(result1, result2)
def test_model_policy_gradient_limited_iterations(): x0 = np.random.randn(10) result = model_policy_gradient( sum_of_squares, x0, learning_rate=1e-1, decay_rate=0.96, decay_steps=10, log_sigma_init=-6.0, batch_size=30, radius_coeff=3.0, warmup_steps=10, known_values=None, max_iterations=15, ) assert isinstance(result.x, np.ndarray) assert isinstance(result.fun, float) assert result.nit == 15
def test_model_policy_gradient(): x0 = np.random.randn(5) result = model_policy_gradient( sum_of_squares, x0, learning_rate=1e-1, decay_rate=0.96, decay_steps=10, log_sigma_init=-6.0, max_iterations=100, batch_size=30, radius_coeff=3.0, warmup_steps=10, known_values=None, ) np.testing.assert_allclose(result.x, np.zeros(len(result.x)), atol=1e-2) np.testing.assert_allclose(result.fun, 0, atol=1e-7) assert isinstance(result.nfev, int)
def test_model_policy_gradient_with_known_values(): x0 = np.random.randn(5) known_xs = [np.ones(5)] known_ys = [10.0] _ = model_policy_gradient( sum_of_squares, x0, learning_rate=1e-1, decay_rate=0.96, decay_steps=10, log_sigma_init=-6.0, max_iterations=50, batch_size=30, radius_coeff=3.0, warmup_steps=10, known_values=(known_xs, known_ys), ) assert len(known_xs) == 1 assert len(known_ys) == 1