def __init__(self): # From https://github.com/markwmuller/controlpy/blob/master/controlpy/synthesis.py#L8 """Solve the continuous time LQR controller for a continuous time system. A and B are system matrices, describing the systems dynamics: dx/dt = A x + B u The controller minimizes the infinite horizon quadratic cost function: cost = integral (x.T*Q*x + u.T*R*u) dt where Q is a positive semidefinite matrix, and R is positive definite matrix. Returns K, X, eigVals: Returns gain the optimal gain K, the solution matrix X, and the closed loop system eigenvalues. The optimal input is then computed as: input: u = -K*x """ # ref Bertsekas, p.151 # Calculate Jacobian around equilibrium # Set point around which the Jacobian should be linearized # It can be here either pole up (all zeros) or pole down s = s0 s[cartpole_state_varname_to_index('position')] = 0.0 s[cartpole_state_varname_to_index('positionD')] = 0.0 s[cartpole_state_varname_to_index('angle')] = 0.0 s[cartpole_state_varname_to_index('angleD')] = 0.0 u = 0.0 jacobian = cartpole_jacobian(s, u) A = jacobian[:, :-1] B = np.reshape(jacobian[:, -1], newshape=(4, 1)) * u_max # Cost matrices for LQR controller self.Q = np.diag([10.0, 1.0, 1.0, 1.0]) # How much to punish x, v, theta, omega self.R = 1.0e1 # How much to punish Q # first, try to solve the ricatti equation # FIXME: Import needs to be different for some reason than in simulator. X = solve_continuous_are(A, B, self.Q, self.R) # compute the LQR gain if np.array(self.R).ndim == 0: Ri = 1.0 / self.R else: Ri = np.linalg.inv(self.R) K = np.dot(Ri, (np.dot(B.T, X))) eigVals = np.linalg.eigvals(A - np.dot(B, K)) self.K = K self.X = X self.eigVals = eigVals
def step(self, s: np.ndarray, target_position: np.ndarray, time=None): state = np.array([[ s[cartpole_state_varname_to_index('position')] - target_position ], [s[cartpole_state_varname_to_index('positionD')]], [s[cartpole_state_varname_to_index('angle')]], [s[cartpole_state_varname_to_index('angleD')]]]) Q = np.asscalar(np.dot(-self.K, state)) # Clip Q if Q > 1.0: Q = 1.0 elif Q < -1.0: Q = -1.0 else: pass return Q
def step(self, s: np.ndarray, target_position: np.ndarray, time=None): error = -np.array([ [s[cartpole_state_varname_to_index('position')] - target_position], [s[cartpole_state_varname_to_index('positionD')]], [s[cartpole_state_varname_to_index('angle')]], [s[cartpole_state_varname_to_index('angleD')]] ]) positionCMD = self.P_position * error[0, :] + self.D_position * error[1] angleCMD = self.P_angle * error[2] + self.D_angle * error[3] N = np.asscalar(angleCMD + positionCMD) Q = N if Q > 1.0: Q = 1.0 elif Q < -1.0: Q = -1.0 else: pass return N
def cartpole_jacobian(s: Union[np.ndarray, SimpleNamespace], u: float): """ Jacobian of cartpole ode with the following structure: # ______________| position | positionD | angle | angleD | u | # position (x) | xx -> J[0,0] xv xt xo xu -> J[0,4] # positionD (v) | vx vv vt vo vu # angle (t) | tx tv tt to tu # angleD (o) | ox -> J[3,0] ov ot oo ou -> J[3,4] :param p: Namespace containing environment variables such track length, cart mass and pole mass :param s: State vector following the globally defined variable order :param u: Force applied on cart in unnormalized range :returns: A 4x5 numpy.ndarray with all partial derivatives """ if isinstance(s, np.ndarray): angle = s[cartpole_state_varname_to_index('angle')] angleD = s[cartpole_state_varname_to_index('angleD')] position = s[cartpole_state_varname_to_index('position')] positionD = s[cartpole_state_varname_to_index('positionD')] elif isinstance(s, SimpleNamespace): angle = s.angle angleD = s.angleD position = s.position positionD = s.positionD J = np.zeros(shape=(4, 5), dtype=np.float32) # Array to keep Jacobian ca = np.cos(angle) sa = np.sin(angle) if CARTPOLE_EQUATIONS == 'Marcin-Sharpneat': # Helper function A = k * (M + m) - m * (ca ** 2) # Jacobian entries J[0, 0] = 0.0 # xx J[0, 1] = 1.0 # xv J[0, 2] = 0.0 # xt J[0, 3] = 0.0 # xo J[0, 4] = 0.0 # xu J[1, 0] = 0.0 # vx J[1, 1] = -k * M_fric / A # vv J[1, 2] = ( # vt -2.0 * k * u * ca * sa * m - 2.0 * ca * sa * m * ( -k * L * (angleD**2) * sa * m + g * ca * sa * m - k * positionD * M_fric + (angleD * ca * J_fric/L) ))/(A**2) \ + ( -k * L * (angleD**2) * ca * m + g * ((ca**2)-(sa**2)) * m - (angleD * sa * J_fric)/L )/ A J[1, 3] = (-2.0 * k * L * angleD * sa * m # vo + (ca * J_fric / L)) / A J[1, 4] = k / A # vu J[2, 0] = 0.0 # tx J[2, 1] = 0.0 # tv J[2, 2] = 0.0 # tt J[2, 3] = 1.0 # to J[2, 4] = 0.0 # tu J[3, 0] = 0.0 # ox J[3, 1] = -ca * M_fric / (L * A) # ov J[3, 2] = ( # ot - 2.0 * u * (ca**2) * sa * m - 2.0 * ca * sa * m * ( -L * (angleD**2) * ca * sa * m + g * sa * (M + m) - positionD * ca * M_fric - (angleD * (M+m) * J_fric)/(L*m)) )/(L*(A**2)) \ + ( -u * sa + L * (angleD**2) * ((sa**2)-(ca**2)) * m + g*ca*(M+m) + positionD * sa * M_fric )/(L*A) J[3, 3] = ( # oo -2.0*L*angleD*ca * sa * m - ((M+m) * J_fric)/(L*m) ) / (L * A) J[3, 4] = ca / (L*A) # ou return J
def cartpole_ode(s: np.ndarray, u: float): return _cartpole_ode( s[..., cartpole_state_varname_to_index('angle')], s[..., cartpole_state_varname_to_index('angleD')], s[..., cartpole_state_varname_to_index('position')], s[..., cartpole_state_varname_to_index('positionD')], u )
Q + controlDisturbance * rng.standard_normal(size=np.shape(Q), dtype=np.float32) + P_GLOBALS.controlBias ) # Q is drive -1:1 range, add noise on control return u if __name__ == '__main__': import timeit """ On 9.02.2021 we saw a perfect coincidence (5 digits after coma) of Jacobian from Mathematica cartpole_model.nb with Jacobian calculated with this script for all non zero inputs, dtype=float32 """ # Set non-zero input s = s0 s[cartpole_state_varname_to_index('position')] = -30.2 s[cartpole_state_varname_to_index('positionD')] = 2.87 s[cartpole_state_varname_to_index('angle')] = -0.32 s[cartpole_state_varname_to_index('angleD')] = 0.237 u = -0.24 # Calculate time necessary to evaluate cartpole ODE: f_to_measure = 'angleDD, positionDD = cartpole_ode(s, u)' number = 1 # Gives the number of times each timeit call executes the function which we want to measure repeat_timeit = 100000 # Gives how many times timeit should be repeated timings = timeit.Timer(f_to_measure, globals=globals()).repeat(repeat_timeit, number) min_time = min(timings)/float(number) max_time = max(timings)/float(number) average_time = np.mean(timings)/float(number)