Exemplo n.º 1
0
    def test_input_hessian(self):
        """
        """
        us = np.ones((1, 1))
        xs = np.ones((1, 4))
        cost_hess = CartPoleConfigModule.hessian_cost_fn_input(us, xs)

        # numeric grad
        eps = 1e-4
        expected_hess = np.zeros((1, 1, 1))
        for i in range(1):
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] + eps
            forward = \
                CartPoleConfigModule.gradient_cost_fn_input(
                    xs, tmp_u)
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] - eps
            backward = \
                CartPoleConfigModule.gradient_cost_fn_input(
                    xs, tmp_u)

            expected_hess[0, :, i] = (forward - backward) / (2. * eps)

        assert cost_hess == pytest.approx(expected_hess)
Exemplo n.º 2
0
    def test_terminal_state_hessian(self):
        """
        """
        xs = np.ones(4)
        cost_hess =\
            CartPoleConfigModule.hessian_cost_fn_state(xs, None,
                                                       terminal=True)

        # numeric grad
        eps = 1e-4
        expected_hess = np.zeros((1, 4, 4))
        for i in range(4):
            tmp_x = xs.copy()
            tmp_x[i] = xs[i] + eps
            forward = \
                CartPoleConfigModule.gradient_cost_fn_state(
                    tmp_x, None, terminal=True)
            tmp_x = xs.copy()
            tmp_x[i] = xs[i] - eps
            backward = \
                CartPoleConfigModule.gradient_cost_fn_state(
                    tmp_x, None, terminal=True)

            expected_hess[0, :, i] = (forward - backward) / (2. * eps)

        assert cost_hess == pytest.approx(expected_hess)
Exemplo n.º 3
0
    def test_gradient_input(self):

        config = CartPoleConfigModule()
        cartpole_model = CartPoleModel(config)

        xs = np.ones((1, config.STATE_SIZE)) \
             * np.random.rand(1, config.STATE_SIZE)
        xs[0, -1] = np.pi / 6.
        us = np.ones((1, config.INPUT_SIZE))

        grad = cartpole_model.calc_f_u(xs, us, config.DT)

        # expected cost
        expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
        eps = 1e-4

        for i in range(config.INPUT_SIZE):
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] + eps
            forward = \
                cartpole_model.predict_next_state(xs[0], tmp_u[0])
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] - eps
            backward = \
                cartpole_model.predict_next_state(xs[0], tmp_u[0])

            expected_grad[0, :, i] = (forward - backward) / (2. * eps)

        assert grad == pytest.approx(expected_grad)
Exemplo n.º 4
0
    def test_step(self):
        config = CartPoleConfigModule()
        cartpole_model = CartPoleModel(config)

        curr_x = np.ones(4)
        curr_x[2] = np.pi / 6.

        us = np.ones((1, 1))

        next_x = cartpole_model.predict_traj(curr_x, us)

        d_x0 = curr_x[1]
        d_x1 = (1. + config.MP * np.sin(np.pi / 6.) \
                     * (config.L * (1.**2) \
                        + config.G * np.cos(np.pi / 6.))) \
                / (config.MC + config.MP * np.sin(np.pi / 6.)**2)
        d_x2 = curr_x[3]
        d_x3 = (-1. * np.cos(np.pi / 6.) \
                - config.MP * config.L * (1.**2) \
                  * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
                - (config.MP + config.MC) * config.G \
                   * np.sin(np.pi / 6.)) \
                 / (config.L \
                     * (config.MC \
                        + config.MP * np.sin(np.pi / 6.)**2))

        expected = np.array([d_x0, d_x1, d_x2, d_x3]) * config.DT \
                   + curr_x

        expected = np.stack((curr_x, expected), axis=0)

        assert next_x == pytest.approx(expected, abs=1e-5)
Exemplo n.º 5
0
    def test_input_gradient(self):
        """
        """
        us = np.ones((1, 1))
        cost_grad = CartPoleConfigModule.gradient_cost_fn_input(None, us)

        # numeric grad
        eps = 1e-4
        expected_grad = np.zeros((1, 1))
        for i in range(1):
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] + eps
            forward = \
                CartPoleConfigModule.input_cost_fn(tmp_u)
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] - eps
            backward = \
                CartPoleConfigModule.input_cost_fn(tmp_u)

            expected_grad[0, i] = (forward - backward) / (2. * eps)

        assert cost_grad == pytest.approx(expected_grad)
Exemplo n.º 6
0
    def test_state_gradient(self):
        """
        """
        xs = np.ones((1, 4))
        cost_grad = CartPoleConfigModule.gradient_cost_fn_state(xs, None)

        # numeric grad
        eps = 1e-4
        expected_grad = np.zeros((1, 4))
        for i in range(4):
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] + eps
            forward = \
                CartPoleConfigModule.state_cost_fn(tmp_x, None)
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] - eps
            backward = \
                CartPoleConfigModule.state_cost_fn(tmp_x, None)

            expected_grad[0, i] = (forward - backward) / (2. * eps)

        assert cost_grad == pytest.approx(expected_grad)
Exemplo n.º 7
0
    def test_predict_traj(self):
        config = CartPoleConfigModule()
        cartpole_model = CartPoleModel(config)

        curr_x = np.ones(config.STATE_SIZE)
        curr_x[-1] = np.pi / 6.
        u = np.ones((1, config.INPUT_SIZE))

        pred_xs = cartpole_model.predict_traj(curr_x, u)

        u = np.tile(u, (2, 1, 1))
        pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0]

        assert pred_xs_alltogether == pytest.approx(pred_xs)
Exemplo n.º 8
0
    def test_calc_costs(self):
        # make config
        config = CartPoleConfigModule()
        # set
        pred_len = 5
        state_size = 4
        input_size = 1
        pop_size = 2
        pred_xs = np.ones((pop_size, pred_len, state_size))
        g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5
        input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5

        costs = config.input_cost_fn(input_samples)

        assert costs.shape == (pop_size, pred_len, input_size)

        costs = config.state_cost_fn(pred_xs, g_xs)

        assert costs.shape == (pop_size, pred_len, 1)

        costs = config.terminal_state_cost_fn(pred_xs[:, -1, :], g_xs[:,
                                                                      -1, :])

        assert costs.shape == (pop_size, 1)
Exemplo n.º 9
0
 def __init__(self):
     self.env = CartPoleEnv()
     self.config = CartPoleConfigModule()
     self.planner = ConstantPlanner(self.config)
     self.model = CartPoleModel(self.config)
     self.controller = iLQR(self.config, self.model)