def simulate_with_actions(state, N, action_schedule): quad_dev = Quad(state) trajectory = [state.copy()] action_schedule[:, 2] += M * G for i in range(N-1): action = action_schedule[i] state, _ = quad_dev.step(state, action) trajectory.append(state.copy()) return np.array(trajectory)
def main(): simulation_time = 20 ## prepare the trajectory(course) to track idx = np.arange(0, 4 * simulation_time, dl) course_pos = curve_pos(idx) course_vel = curve_vel(idx) speed = 0.5 # m/s course_vel = course_vel / np.linalg.norm(course_vel, axis=0) * (speed + np.random.rand()) # normalize with the same speed, but not direction. course_vel[:, 0] = 0 # avoid nan course_vel[:, -1] = 0 # stop course = np.concatenate((course_pos, course_vel), axis=0) # desired state with dim 6, shape [dim, timeslot] # init state and target state init_state = course[:, 0] + np.array([1., 2., 0., 0., 0., 0.])#np.array([-1., 1., 0., 0., 0., 0.]) # state = init_state nearest, _ = calc_nearest_index(state, course, 0) target_states = course[:, nearest] # quadrotor object quad = Quad(init_state) # dimension nu = 4 nx = init_state.shape[0] # 6 # controller parameters setting for both lqr and linear mpc u = np.zeros(nu) horizon = 20 # predict and control horizon in mpc Q = np.array([20. , 20., 20., 8, 8, 8]) QN = Q # Q = np.array([8. , 8., 8., 0.8, 0.8, 0.8]) # QN = np.array([10., 10., 10., 1.0, 1.0, 1.0]) R = np.array([6., 6., 6.]) Ad, Bd = linearized_model(state) # actually use linear time-invariant model linearized from the equilibrium point # record state labels = ['x', 'y', 'z', 'vx', 'vy', 'vz'] # convenient for plot state_control = {x: [] for x in labels} target_chosen = {x: [] for x in labels} for ii in range(6): target_chosen[labels[ii]].append(target_states[ii]) err = [] # tracking error # visualization fig = plt.figure() ax = plt.axes(projection='3d') # control loop timestamp = 0.0 while timestamp <= simulation_time: print("timestamp", timestamp) if TRACKING: target_states, nearest, chosen_idxs = calc_ref_trajectory(state, course, nearest, horizon) else: target_states = np.array([0, 0, 10, 0, 0, 0]) if CONTROLLER=='MPC': mpc_policy = mpc(state, target_states, Ad, Bd, horizon, Q, QN, R, TRACKING) nominal_action, actions, nominal_state = mpc_policy.solve() action = nominal_action #- np.dot(K,(target_states - nominal_state)) roll_r, pitch_r, thrust_r = action u[0] = - pitch_r u[1] = - roll_r u[2] = 0.0 u[3] = thrust_r + M * G elif CONTROLLER=='LQR': lqr_policy = FiniteHorizonLQR(Ad, Bd, Q, R, QN, horizon=horizon) # instantiate a lqr controller lqr_policy.set_target_state(target_states) ## set target state to koopman observable state lqr_policy.sat_val = np.array([PI/8., PI/8., 0.75]) K, ustar = lqr_policy(state) u[0] = - ustar[1] u[1] = - ustar[0] u[2] = 0.0 u[3] = ustar[2] + M * G print("K", K) elif CONTROLLER=='PID': # Compute control errors x, y, z, dx, dy, dz = state x_r, y_r, z_r, dx_r, dy_r, dz_r = target_states ex = x - x_r ey = y - y_r ez = z - z_r dex = dx - dx_r dey = dy - dy_r dez = dz - dz_r xi = 1.2 wn = 3.0 Kp = - wn * wn Kd = - 2 * wn * xi Kxp = 1.2 * Kp Kxd = 1.2 * Kd Kyp = Kp Kyd = Kd Kzp = 0.8 * Kp Kzd = 0.8 * Kd pitch_r = Kxp * ex + Kxd * dex roll_r = Kyp * ey + Kyd * dey thrust_r = (Kzp * ez + Kzd * dez + 9.8) * 0.44 u[0] = pitch_r u[1] = roll_r u[2] = 0. u[3] = thrust_r + M * G next_state = quad.step(state, u) state = next_state timestamp = timestamp + dt if TRACKING: err.append(state[:3] - target_states[:3, -1]) else: err.append(state[:3] - target_states[:3]) # record for ii in range(len(labels)): state_control[labels[ii]].append(state[ii]) if TRACKING: target_chosen[labels[ii]].append(target_states[ii, -1]) else: target_chosen[labels[ii]].append(target_states[ii]) ## plot if True: #timestamp > dt: # # plot in real time plt.cla() x = np.append(init_state[0], state_control['x']) y = np.append(init_state[1], state_control['y']) z = np.append(init_state[2], state_control['z']) final_idx = len(x) - 1 ax.plot3D(x, y, z, label='Quadcopter flight trajectory') ax.plot3D([init_state[0]], [init_state[1]], [init_state[2]], label='Initial position', color='blue', marker='x') ax.plot3D([x[final_idx]], [y[final_idx]], [z[final_idx]], label='Final position', color='green', marker='o') if TRACKING: plt_len = chosen_idxs[-1] + 100 ax.plot3D(course[0, :plt_len], course[1, :plt_len], course[2, :plt_len]) # ax.scatter3D([course[0, chosen_idxs[0]]], [course[1, chosen_idxs[0]]], [course[2, chosen_idxs[0]]], color='red', label='chosen target flight trajectory', # marker='o') for i in range(horizon): ax.scatter3D([course[0, chosen_idxs[i]]], [course[1, chosen_idxs[i]]], [course[2, chosen_idxs[i]]], color='red', marker='o') ax.scatter3D([target_states[0]], [target_states[1]], [target_states[2]], color='red', marker='o') plt.legend() plt.pause(0.01) err = np.stack(err) plt.figure() plt.plot(err[:,0], label='x') plt.plot(err[:,1], label='y') plt.plot(err[:,2], label='z') plt.legend() plt.show()
def main(): quad = Quad() ### instantiate a quadcopter ### the timing parameters for the quad are used ### to construct the koopman operator and the adjoint dif-eq koopman_operator = KoopmanOperator(quad.time_step) adjoint = Adjoint(quad.time_step) task = Task() ### this creates the task simulation_time = 1000 horizon = 20 ### time horizon sat_val = 6.0 ### saturation value control_reg = np.diag([1.] * 4) ### control regularization inv_control_reg = np.linalg.inv(control_reg) ### precompute this default_action = lambda x: np.random.uniform(-0.1, 0.1, size=( 4, )) ### in case lqr control returns NAN ### initialize the state #_R = np.diag([1.0, 1.0, 1.0]) _R = euler2mat(np.random.uniform(-1., 1., size=(3, ))) _p = np.array([0., 0., 0.]) _g = RpToTrans(_R, _p).ravel() _twist = np.random.uniform(-1., 1., size=(6, )) #* 2.0 state = np.r_[_g, _twist] target_orientation = np.array([0., 0., -9.81]) err = np.zeros(simulation_time) for t in range(simulation_time): #### measure state and transform through koopman observables m_state = get_measurement(state) t_state = koopman_operator.transform_state(m_state) err[t] = np.linalg.norm(m_state[:3] - target_orientation) + np.linalg.norm( m_state[3:]) Kx, Ku = koopman_operator.get_linearization( ) ### grab the linear matrices lqr_policy = FiniteHorizonLQR( Kx, Ku, task.Q, task.R, task.Qf, horizon=horizon) # instantiate a lqr controller lqr_policy.set_target_state( task.target_expanded_state ) ## set target state to koopman observable state lqr_policy.sat_val = sat_val ### set the saturation value ### forward sim the koopman dynamical system (here fdx, fdu is just Kx, Ku in a list) trajectory, fdx, fdu, action_schedule = koopman_operator.simulate( t_state, horizon, policy=lqr_policy) ldx, ldu = task.get_linearization_from_trajectory( trajectory, action_schedule) mudx = lqr_policy.get_linearization_from_trajectory(trajectory) rhof = task.mdx(trajectory[-1]) ### get terminal condition for adjoint rho = adjoint.simulate_adjoint(rhof, ldx, ldu, fdx, fdu, mudx, horizon) ustar = -np.dot(inv_control_reg, fdu[0].T.dot( rho[0])) + lqr_policy(t_state) ustar = np.clip(ustar, -sat_val, sat_val) ### saturate control if np.isnan(ustar).any(): ustar = default_action(None) ### advacne quad subject to ustar next_state = quad.step(state, ustar) ### update the koopman operator from data koopman_operator.compute_operator_from_data( get_measurement(state), ustar, get_measurement(next_state)) state = next_state ### backend : update the simulator state ### we can also use a decaying weight on inf gain task.inf_weight = 100.0 * (0.99**(t)) if t % 100 == 0: print('time : {}, pose : {}, {}'.format(t * quad.time_step, get_measurement(state), ustar)) t = [i * quad.time_step for i in range(simulation_time)] plt.plot(t, err) plt.xlabel('t') plt.ylabel('tracking error') plt.show()