delta = 0.1 deltas = [] grads = [] for i in range(5): dtraj = [] traj0 = [] traj1 = [] dx, dy, dz = 0.0, 10.0, 10.0 x0, y0, z0 = 0.0, 10.0, 10.0 x1, y1, z1 = x0 + delta * dx, y0 + delta * dy, z0 + delta * dz dtraj.append((dx, dy, dz)) traj0.append((x0, y0, z0)) traj1.append((x1, y1, z1)) for i in range(NSTEP): dx, dy, dz = l.diff(x0, y0, z0, dx, dy, dz, DT) x0, y0, z0 = l.step(x0, y0, z0, DT) x1, y1, z1 = l.step(x1, y1, z1, DT) dtraj.append((dx, dy, dz)) traj0.append((x0, y0, z0)) traj1.append((x1, y1, z1)) grad = (asarray(traj1[-1]) - asarray(traj0[-1])) / asarray(dtraj[-1]) deltas.append(delta); grads.append(grad) delta *= 0.1 deltas, grads = asarray(deltas), asarray(grads) pylab.loglog(deltas, grads[:,0],'+-') pylab.loglog(deltas, grads[:,1],'+-') pylab.loglog(deltas, grads[:,2],'+-')
from numpy import asarray from lorenz import Lorenz l = Lorenz(10.0, 28.0, 8.0/3.0) dtraj, traj = [], [] DT, NSTEP = 0.01, 1000 dx, dy, dz = 0.0, 10.0, 10.0 x, y, z = 0.0, 10.0, 10.0 dtraj.append((dx, dy, dz)) traj.append((x, y, z)) for i in range(NSTEP): dx, dy, dz = l.diff(x, y, z, dx, dy, dz, DT) x, y, z = l.step(x, y, z, DT) dtraj.append((dx, dy, dz)) traj.append((x, y, z)) trajadj = [] xadj, yadj, zadj = 0.0, 10.0, 10.0 trajadj.append((xadj, yadj, zadj)) for i in range(NSTEP-1, 0-1, -1): x, y, z = traj[i] xadj, yadj, zadj = l.adj(x, y, z, xadj, yadj, zadj, DT) trajadj.append((xadj, yadj, zadj)) trajadj.reverse() aggr = (asarray(trajadj) * asarray(dtraj)).sum(axis=1) # pylab.plot(aggr) print aggr.max() - aggr.min()
b = 8 / 3 r = 1.5 lrz = Lorenz(sigma, b, r) # initialize lorenz object with given parameters n_samples = 1000 # set number of training samples lrz.X, lrz.U = np.zeros((n_samples, 3)), np.zeros( (n_samples, 1)) # initialize training data to 0 """ Training randomly initialize the state of the lorenz object and set lrz.X[i, :] to the initial state lorenz object takes one step with -ve control and gets reward r1 reset the lorenz state back to starting state and take another step with +ve control which gives reward r2 Set policy lrz.U[i, 0] to -1 or 1 depending upon which policy maximizes reward """ for i in range(n_samples): lrz.X[i, :] = lrz.reset() lrz.step(-lrz.max_control) r1 = lrz.reward() lrz.state = lrz.X[i, :] lrz.step(lrz.max_control) r2 = lrz.reward() lrz.U[i, 0] = 2 * np.argmax([r1, r2]) - 1 data = { 'sigma': sigma, 'b': b, 'r': r, 'n_samples': n_samples, 'X': lrz.X, 'U': lrz.U } #write a file