def __init__(self):
        # From https://github.com/markwmuller/controlpy/blob/master/controlpy/synthesis.py#L8
        """Solve the continuous time LQR controller for a continuous time system.

        A and B are system matrices, describing the systems dynamics:
         dx/dt = A x + B u

        The controller minimizes the infinite horizon quadratic cost function:
         cost = integral (x.T*Q*x + u.T*R*u) dt

        where Q is a positive semidefinite matrix, and R is positive definite matrix.

        Returns K, X, eigVals:
        Returns gain the optimal gain K, the solution matrix X, and the closed loop system eigenvalues.
        The optimal input is then computed as:
         input: u = -K*x
        """
        # ref Bertsekas, p.151

        # Calculate Jacobian around equilibrium
        # Set point around which the Jacobian should be linearized
        # It can be here either pole up (all zeros) or pole down
        s = s0
        s[cartpole_state_varname_to_index('position')] = 0.0
        s[cartpole_state_varname_to_index('positionD')] = 0.0
        s[cartpole_state_varname_to_index('angle')] = 0.0
        s[cartpole_state_varname_to_index('angleD')] = 0.0
        u = 0.0

        jacobian = cartpole_jacobian(s, u)

        A = jacobian[:, :-1]
        B = np.reshape(jacobian[:, -1], newshape=(4, 1)) * u_max

        # Cost matrices for LQR controller
        self.Q = np.diag([10.0, 1.0, 1.0,
                          1.0])  # How much to punish x, v, theta, omega
        self.R = 1.0e1  # How much to punish Q

        # first, try to solve the ricatti equation
        # FIXME: Import needs to be different for some reason than in simulator.
        X = solve_continuous_are(A, B, self.Q, self.R)

        # compute the LQR gain
        if np.array(self.R).ndim == 0:
            Ri = 1.0 / self.R
        else:
            Ri = np.linalg.inv(self.R)

        K = np.dot(Ri, (np.dot(B.T, X)))

        eigVals = np.linalg.eigvals(A - np.dot(B, K))

        self.K = K
        self.X = X
        self.eigVals = eigVals
    def step(self, s: np.ndarray, target_position: np.ndarray, time=None):

        state = np.array([[
            s[cartpole_state_varname_to_index('position')] - target_position
        ], [s[cartpole_state_varname_to_index('positionD')]],
                          [s[cartpole_state_varname_to_index('angle')]],
                          [s[cartpole_state_varname_to_index('angleD')]]])

        Q = np.asscalar(np.dot(-self.K, state))

        # Clip Q
        if Q > 1.0:
            Q = 1.0
        elif Q < -1.0:
            Q = -1.0
        else:
            pass

        return Q
    def step(self, s: np.ndarray, target_position: np.ndarray, time=None):
        error = -np.array([
            [s[cartpole_state_varname_to_index('position')] - target_position],
            [s[cartpole_state_varname_to_index('positionD')]],
            [s[cartpole_state_varname_to_index('angle')]],
            [s[cartpole_state_varname_to_index('angleD')]]
        ])

        positionCMD = self.P_position * error[0, :] + self.D_position * error[1]
        angleCMD = self.P_angle * error[2] + self.D_angle * error[3]

        N = np.asscalar(angleCMD + positionCMD)
        Q = N

        if Q > 1.0:
            Q = 1.0
        elif Q < -1.0:
            Q = -1.0
        else:
            pass

        return N
示例#4
0
def cartpole_jacobian(s: Union[np.ndarray, SimpleNamespace], u: float):
    """
    Jacobian of cartpole ode with the following structure:

        # ______________|    position     |   positionD    | angle | angleD |       u       |
        # position  (x) |   xx -> J[0,0]        xv            xt       xo      xu -> J[0,4]
        # positionD (v) |       vx              vv            vt       vo         vu
        # angle     (t) |       tx              tv            tt       to         tu
        # angleD    (o) |   ox -> J[3,0]        ov            ot       oo      ou -> J[3,4]
    
    :param p: Namespace containing environment variables such track length, cart mass and pole mass
    :param s: State vector following the globally defined variable order
    :param u: Force applied on cart in unnormalized range

    :returns: A 4x5 numpy.ndarray with all partial derivatives
    """
    if isinstance(s, np.ndarray):
        angle = s[cartpole_state_varname_to_index('angle')]
        angleD = s[cartpole_state_varname_to_index('angleD')]
        position = s[cartpole_state_varname_to_index('position')]
        positionD = s[cartpole_state_varname_to_index('positionD')]
    elif isinstance(s, SimpleNamespace):
        angle = s.angle
        angleD = s.angleD
        position = s.position
        positionD = s.positionD
    
    J = np.zeros(shape=(4, 5), dtype=np.float32)  # Array to keep Jacobian
    ca = np.cos(angle)
    sa = np.sin(angle)

    if CARTPOLE_EQUATIONS == 'Marcin-Sharpneat':
        # Helper function
        A = k * (M + m) - m * (ca ** 2)

        # Jacobian entries
        J[0, 0] = 0.0  # xx

        J[0, 1] = 1.0  # xv

        J[0, 2] = 0.0  # xt

        J[0, 3] = 0.0  # xo

        J[0, 4] = 0.0  # xu

        J[1, 0] = 0.0  # vx

        J[1, 1] = -k * M_fric / A  # vv

        J[1, 2] = (    # vt
                     -2.0 * k * u * ca * sa * m
                     - 2.0 * ca * sa * m * (
                             -k * L * (angleD**2) * sa * m
                             + g * ca * sa * m - k * positionD * M_fric
                             + (angleD * ca * J_fric/L)
                                                                ))/(A**2) \
             + (
                     -k * L * (angleD**2) * ca * m
                     +  g * ((ca**2)-(sa**2)) * m
                     - (angleD * sa * J_fric)/L
                                                                )/ A

        J[1, 3] = (-2.0 * k * L * angleD * sa * m  # vo
              + (ca * J_fric / L)) / A

        J[1, 4] = k / A  # vu

        J[2, 0] = 0.0  # tx

        J[2, 1] = 0.0  # tv

        J[2, 2] = 0.0  # tt

        J[2, 3] = 1.0  # to

        J[2, 4] = 0.0  # tu

        J[3, 0] = 0.0  # ox

        J[3, 1] = -ca * M_fric / (L * A)  # ov

        J[3, 2] = (  # ot
                    - 2.0 * u * (ca**2) * sa * m
                    - 2.0 * ca * sa * m * (
                            -L * (angleD**2) * ca * sa * m
                            + g * sa * (M + m)
                            - positionD * ca * M_fric
                            - (angleD * (M+m) * J_fric)/(L*m))
                                    )/(L*(A**2)) \
             + (
                     -u * sa
                     + L * (angleD**2) * ((sa**2)-(ca**2)) * m
                     + g*ca*(M+m)
                     + positionD * sa * M_fric
                                    )/(L*A)

        J[3, 3] = (  # oo
                     -2.0*L*angleD*ca * sa * m
                     - ((M+m) * J_fric)/(L*m)
                                    ) / (L * A)

        J[3, 4] = ca / (L*A)  # ou

        return J
示例#5
0
def cartpole_ode(s: np.ndarray, u: float):
    return _cartpole_ode(
        s[..., cartpole_state_varname_to_index('angle')], s[..., cartpole_state_varname_to_index('angleD')],
        s[..., cartpole_state_varname_to_index('position')], s[..., cartpole_state_varname_to_index('positionD')],
        u
    )
示例#6
0
        Q + controlDisturbance *  rng.standard_normal(size=np.shape(Q), dtype=np.float32) + P_GLOBALS.controlBias
    )  # Q is drive -1:1 range, add noise on control

    return u


if __name__ == '__main__':
    import timeit
    """
    On 9.02.2021 we saw a perfect coincidence (5 digits after coma) of Jacobian from Mathematica cartpole_model.nb
    with Jacobian calculated with this script for all non zero inputs, dtype=float32
    """

    # Set non-zero input
    s = s0
    s[cartpole_state_varname_to_index('position')] = -30.2
    s[cartpole_state_varname_to_index('positionD')] = 2.87
    s[cartpole_state_varname_to_index('angle')] = -0.32
    s[cartpole_state_varname_to_index('angleD')] = 0.237
    u = -0.24


    # Calculate time necessary to evaluate cartpole ODE:

    f_to_measure = 'angleDD, positionDD = cartpole_ode(s, u)'
    number = 1  # Gives the number of times each timeit call executes the function which we want to measure
    repeat_timeit = 100000 # Gives how many times timeit should be repeated
    timings = timeit.Timer(f_to_measure, globals=globals()).repeat(repeat_timeit, number)
    min_time = min(timings)/float(number)
    max_time = max(timings)/float(number)
    average_time = np.mean(timings)/float(number)