from mpl_toolkits.mplot3d import Axes3D import pickle import matplotlib.pyplot as plt """ Initialize lorenz object by loading parameters from the training data file """ f = open("learning_algorithm2_training_data", "rb") d = pickle.load(f) sigma = d['sigma'] b = d['b'] r = d['r'] lrz = Lorenz(sigma, b, r) lrz.X = d['X'] lrz.U = d['U'] """ Initialize lorenz object state and compute trajectories with learning based control, lyapunov based control, and without any control """ n = 6000 # number of time steps lrz.state = [-4, -4, -1] y_l, u_l, t_l = lrz.trajectory(n, 0) lrz.state = [-4, -4, -1] y_m, u_m, t_m = lrz.trajectory(n, 1) lrz.state = [-4, -4, -1] y_wc, t_wc = lrz.trajectory_no_control(n) """ trajectory visualization """ fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111, projection='3d') ax.plot(y_wc[:, 0], y_wc[:, 1], y_wc[:, 2], 'r', linewidth=2, label="uncontrolled trajectory") ax.plot(y_l[:, 0],
lrz = Lorenz(sigma, b, r) # initialize lorenz object with given parameters n_samples = 1000 # set number of training samples lrz.X, lrz.U = np.zeros((n_samples, 3)), np.zeros( (n_samples, 1)) # initialize training data to 0 """ Training randomly initialize the state of the lorenz object and set lrz.X[i, :] to the initial state lorenz object takes one step with -ve control and gets reward r1 reset the lorenz state back to starting state and take another step with +ve control which gives reward r2 Set policy lrz.U[i, 0] to -1 or 1 depending upon which policy maximizes reward """ for i in range(n_samples): lrz.X[i, :] = lrz.reset() lrz.step(-lrz.max_control) r1 = lrz.reward() lrz.state = lrz.X[i, :] lrz.step(lrz.max_control) r2 = lrz.reward() lrz.U[i, 0] = 2 * np.argmax([r1, r2]) - 1 data = { 'sigma': sigma, 'b': b, 'r': r, 'n_samples': n_samples, 'X': lrz.X, 'U': lrz.U } #write a file f = open("learning_algorithm2_training_data", "wb") pickle.dump(data, f)