from lorenz import Lorenz
from mpl_toolkits.mplot3d import Axes3D
import pickle
import matplotlib.pyplot as plt
"""   Initialize lorenz object by loading parameters from the training data file   """
f = open("learning_algorithm2_training_data", "rb")
d = pickle.load(f)
sigma = d['sigma']
b = d['b']
r = d['r']
lrz = Lorenz(sigma, b, r)
lrz.X = d['X']
lrz.U = d['U']
"""  Initialize lorenz object state and compute trajectories with learning based control, lyapunov based control, 
and without any control  """
n = 6000  # number of time steps
lrz.state = [-4, -4, -1]
y_l, u_l, t_l = lrz.trajectory(n, 0)
lrz.state = [-4, -4, -1]
y_m, u_m, t_m = lrz.trajectory(n, 1)
lrz.state = [-4, -4, -1]
y_wc, t_wc = lrz.trajectory_no_control(n)
"""  trajectory visualization  """
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
ax.plot(y_wc[:, 0],
        y_wc[:, 1],
        y_wc[:, 2],
        'r',
        linewidth=2,
        label="uncontrolled trajectory")
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from lorenz import Lorenz
import pickle

sigma = 10
b = 8 / 3
r = 1.5
lrz = Lorenz(sigma, b, r)  # initialize lorenz object with given parameters

n_samples = 1000  # set number of training samples
lrz.X, lrz.U = np.zeros((n_samples, 3)), np.zeros(
    (n_samples, 1))  # initialize training data to 0
"""  Training  
randomly initialize the state of the lorenz object and set lrz.X[i, :] to the initial state
lorenz object takes one step with -ve control and gets reward r1
reset the lorenz state back to starting state and take another step with +ve control which gives reward r2
Set policy lrz.U[i, 0] to -1 or 1 depending upon which policy maximizes reward
"""
for i in range(n_samples):
    lrz.X[i, :] = lrz.reset()
    lrz.step(-lrz.max_control)
    r1 = lrz.reward()
    lrz.state = lrz.X[i, :]
    lrz.step(lrz.max_control)
    r2 = lrz.reward()
    lrz.U[i, 0] = 2 * np.argmax([r1, r2]) - 1

data = {
    'sigma': sigma,