Пример #1
0
 def testIlqrWithLqrProblemSpecifiedGenerally(self):
     dim, T, num_iters = 2, 10, 3
     p = one_step_control(dim, T)
     x0 = self.rng.normal(size=dim)
     X, U = control.ilqr(num_iters, p, x0, jnp.zeros((T, dim)))
     np.testing.assert_allclose(X[0], x0)
     np.testing.assert_allclose(U[0], -x0)
     np.testing.assert_allclose(X[1:], jnp.zeros((T, 2)), atol=1E-15)
     np.testing.assert_allclose(U[1:], jnp.zeros((T - 1, 2)), atol=1E-15)
Пример #2
0
 def testIlqrWithLqrProblemSpecifiedGenerally(self):
   randn = np.random.RandomState(0).randn
   dim, T, num_iters = 2, 10, 3
   p = one_step_control(dim, T)
   x0 = randn(dim)
   X, U = control.ilqr(num_iters, p, x0, jnp.zeros((T, dim)))
   self.assertAllClose(X[0], x0, check_dtypes=True)
   self.assertAllClose(U[0], -x0, check_dtypes=True)
   self.assertAllClose(X[1:], jnp.zeros((T, 2)), check_dtypes=True)
   self.assertAllClose(U[1:], jnp.zeros((T - 1, 2)), check_dtypes=True)
Пример #3
0
 def testIlqrWithLqrProblem(self):
     dim, T, num_iters = 2, 10, 3
     lqr = one_step_lqr(dim, T)
     p = control_from_lqr(lqr)
     x0 = self.rng.normal(size=dim)
     X, U = control.ilqr(num_iters, p, x0, jnp.zeros((T, dim)))
     np.testing.assert_allclose(X[0], x0)
     np.testing.assert_allclose(U[0], -x0)
     np.testing.assert_allclose(X[1:], jnp.zeros((T, 2)), atol=1E-15)
     np.testing.assert_allclose(U[1:], jnp.zeros((T - 1, 2)), atol=1E-15)
Пример #4
0
 def testIlqrWithLqrProblem(self):
   randn = np.random.RandomState(0).randn
   dim, T, num_iters = 2, 10, 3
   lqr = one_step_lqr(dim, T)
   p = control_from_lqr(lqr)
   x0 = randn(dim)
   X, U = control.ilqr(num_iters, p, x0, jnp.zeros((T, dim)))
   self.assertAllClose(X[0], x0, check_dtypes=True)
   self.assertAllClose(U[0], -x0, check_dtypes=True)
   self.assertAllClose(X[1:], jnp.zeros((T, 2)), check_dtypes=True)
   self.assertAllClose(U[1:], jnp.zeros((T - 1, 2)), check_dtypes=True)
Пример #5
0
    def testIlqrWithNonlinearProblem(self):
        def cost(t, x, u):
            return (x[0]**2. + 1e-3 * u[0]**2.) / (t + 1.)

        def dynamics(t, x, u):
            return (x**2. - u**2.) / (t + 1.)

        T, num_iters, d = 10, 7, 1
        p = control.ControlSpec(cost, dynamics, T, d, d)

        x0 = jnp.array([0.2])
        X, U = control.ilqr(num_iters, p, x0, 1e-5 * jnp.ones((T, d)))
        assert_close = partial(np.testing.assert_allclose, atol=1e-2)
        assert_close(X[0], x0)
        assert_close(U[0]**2., x0**2.)
        assert_close(X[1:], jnp.zeros((T, d)))
        assert_close(U[1:], jnp.zeros((T - 1, d)))