def get_random_data_pairs(n, visual=False, stable_equ=False, ext=False, quasi=False, f=False, noise=None): # generates n random state initialisations and their change in state # stable_equ if generation is only about theta = pi # returns 4xn matrices xs and ys for the initial state and change in state respectivvly system = CartPole(visual) # initial_state = random_init(initial_state,stable_equ) # system.setState(initial_state) v = 4 force = 0 if f: v = 5 initial_state = np.zeros(v) xs = np.zeros((v, n)) ys = np.zeros((4, n)) seed = 1 for i in range(n): if quasi == False: initial_state = random_init(initial_state, stable_equ, ext, f=f) else: initial_state, seed = quasi_init(seed, f=f) system.setState(initial_state[0:4]) if f: force = initial_state[4] xs[:, i], ys[:, i] = get_xy_pair(0, system, f=f, force=force, noise=noise) ## each column is a sample return xs, ys
def test_model(c, n, visual=False, loop=False, osc=False, stable_equ=False, model=0, f=False): # c is 4x4 linear coefficent matrix, or contais alpha and basis location for non linear model # n is number of time iterations to predict # stable_equ if motion is about theta = pi # plots real state trajectory and linear model predictions # model = 0 if linear model #modoel = 1 if non linear model v = 4 if f: v = 5 system = CartPole(visual) initial_state = np.zeros(v) initial_state = random_init(initial_state, stable_equ, f=f) initial_state[0] = 0 if loop == True: initial_state = np.array([0, 0, np.pi, 15]) elif osc == True: initial_state = np.array([0, 0, np.pi, 5]) force = 0 if f: force = initial_state[4] system.setState(initial_state[0:4]) state_history = np.empty((v, n)) state_history[:, 0] = initial_state[0:v] for i in range(n - 1): # update dynamics n times system.performAction(force) ## runs for 0.1 secs with no force system.remap_angle() state_history[0:4, i + 1] = system.getState() if f: state_history[4] = force #time = np.arange(0, (n ) * 0.1, 0.1) prediction = state_prediction(c, n, initial_state, model, f=f) for i in range(4): plt.subplot(2, 2, i + 1) plt.subplots_adjust(hspace=0.3) plt.plot(state_history[i, :], linestyle=':', label='Actual') plt.plot(prediction[i, :], label='Predicted') plt.xlabel('Iteration') plt.title(labels[i]) plt.legend(loc='upper right') if i != 2: plt.ylim = (np.min(state_history[i, :]) - 0.5 * np.min(state_history[i, :]), np.max(state_history[i, :]) + 0.5 * np.max(state_history[i, :])) # if i !=0 and i!=2: # plt.ylim(-20, 20) plt.show()
def rollout(max_t, init_state, p, model=None): steps = int(max_t / cartpole1.delta_time) # 0.2s per step Xn = init_state[:-1] Xn_new = Xn if model == None: cartpole = CartPole() X_cartpole = [Xn[:-1]] L_model = 0 for i in range(steps): Xn = Xn_new # change the action term according to the policy cartpole.setState(Xn[:4]) action = np.dot(p, Xn) cartpole.performAction(action) cartpole.remap_angle() Xn_new = np.array(cartpole.getState()) X_cartpole.append(Xn_new) L_model += loss(Xn_new) return X_cartpole[:-1], L_model else: X_model = [Xn] L_model = 0 for i in range(steps): Xn = Xn_new # change the action term according to the policy Xn[-1] = np.dot(p, Xn) Xn = Xn.reshape(1, Xn.shape[0]) Yn = predict(Xn, XM, sigma, alpha) Yn.resize(Xn.shape) Xn_new = Xn + Yn Xn_new = np.array(Xn_new[0]) Xn_new[2] = remap_angle(Xn_new[2]) X_model.append(Xn_new) L_model += loss(Xn_new[:-1]) return X_model[:-1], L_model
def rolloutL(p): max_t = 15 state1 = np.array([0, 0, 0.3, 0, 0]) state2 = np.array([0, 0, 0.15, 0, 0]) init_state = state1 cartpole = CartPole() steps = int(max_t / cartpole.delta_time) # 0.2s per step Xn = init_state[:-1] Xn_new = Xn L_model = 0 for i in range(steps): Xn = Xn_new cartpole.setState(Xn[:4]) action = np.dot(p, Xn) cartpole.performAction(action) cartpole.remap_angle() Xn_new = np.array(cartpole.getState()) n1 = np.random.normal(0, 0.1, 1)[0] n2 = np.random.normal(0, 0.1, 1)[0] n3 = np.random.normal(0, 0.05, 1)[0] n4 = np.random.normal(0, 0.1, 1)[0] noise = np.array([n1, n2, n3, n4]) Xn_new += noise L_model += loss(Xn_new) return L_model
def loss_function(p): model = non_linear_model(10, f=True) #model.read_from_file('m=500n=10000') model.read_from_file('m=1000n=10000') loss = 0 #initial =np.array([random.normalvariate(0,0.03),random.normalvariate(0,0.03),random.normalvariate(0,0.03),random.normalvariate(0,0.03)]) #+ random.normalvariate(0,0.01) initial = np.array([0, 0.2, 0.05, -0.2]) #*10 n = 20 loss = loss + loss_pos(initial) x = np.zeros((4, 1)) x[:, 0] = initial[0:4] x_extended = np.zeros((5)) system = CartPole(visual=False) system.setState(initial) for i in range(n): # # using non linear modeelling for training change = np.zeros((1, 4)) if isinstance(p, (list, tuple, np.ndarray)): force = np.matmul(p, x) else: force = np.matmul(p._value, x) x_extended[0:4] = x[:, 0] x_extended[4] = force test = np.matmul(model.alpha.T, model.transform_x(x_extended)) x = x + test # x = system.getState() # # if isinstance(p, (list, tuple, np.ndarray)): # force = np.matmul(p,x) # else: # force = np.matmul(p._value, x) # # #force = np.matmul(p, x) # system.performAction(force) # x= system.getState() x[2] = remap_angle(x[2]) loss += loss_pos(x) return loss
def rolloutL(p): max_t = 8 state1 = np.array([0, 0, 0.2, 0, 0]) state2 = np.array([0, 0, 0.15, 0, 0]) init_state = state1 cartpole = CartPole() steps = int(max_t / cartpole.delta_time) # 0.2s per step Xn = init_state[:-1] Xn_new = Xn L_model = 0 for i in range(steps): Xn = Xn_new cartpole.setState(Xn[:4]) action = np.dot(p, Xn) cartpole.performAction(action) cartpole.remap_angle() Xn_new = np.array(cartpole.getState()) L_model += cartpole.loss() return L_model
def rolloutp(max_t, init_state, p): steps = int(max_t / cartpole1.delta_time) # 0.2s per step Xn = init_state[:-1] Xn_new = Xn cartpole = CartPole() X_cartpole = [Xn] L_model = 0 for i in range(steps): Xn = Xn_new cartpole.setState(Xn[:4]) action = np.dot(p, Xn) cartpole.performAction(action) cartpole.remap_angle() Xn_new = cartpole.getState() X_cartpole.append(np.array(Xn_new)) L_model += loss(Xn_new) X_cartpole = np.array(X_cartpole) return X_cartpole[:-1], L_model
def gen_train(N, s): """try generate the dataset and add noise together""" cartpole1 = CartPole() X = [] # true value for current state XX = [] # true value for next state X_wn = [] # observed current state Y = [] # true change in state Y_wn = [] # observed change in state for i in range(N): x = np.random.uniform([-5, 5, 1])[0] x_dot = np.random.uniform([-10, 10, 1])[0] theta = np.random.uniform([-np.pi, np.pi, 1])[0] theta_dot = np.random.uniform([-15, 15, 1])[0] Xn = np.array([x, x_dot, theta, theta_dot]) X.append(Xn) cartpole1.setState(Xn) cartpole1.performAction() cartpole1.remap_angle() Xn_1 = np.array(cartpole1.getState()) XX.append(Xn_1) Y.append(Xn_1 - Xn) X = np.array(X) Xnoise = np.random.normal(0, s, size=(N, 4)) X_wn = X + Xnoise XXnoise = np.random.normal(0, s, size=(N, 4)) XX_wn = XX + XXnoise Y = np.array(Y) Y_wn = np.array(XX_wn - X_wn) # coef = lin_reg(X,Y) # coef_wn = lin_reg(X_wn,Y_wn) reg1 = LinearRegression().fit(X, Y) reg2 = LinearRegression().fit(X_wn, Y_wn) inter1 = reg1.intercept_ inter2 = reg2.intercept_ coef = reg1.coef_ coef_wn = reg2.coef_ # print(coef) # print(coef_wn) Y_predict = np.matmul(X, coef) + inter1 Y_predict = np.array(Y_predict) Y_predict_wn = np.matmul(X, coef_wn) + inter2 Y_predict_wn = np.array(Y_predict_wn) return mse(Y_predict, Y), mse(Y_predict_wn, Y)
def scan_step(to_scan,n,visual=False,f=False): # scnes through one state variable with n points scan variable given by to_scan = [0,3] # plots the next state vector system = CartPole(visual) initial_state = np.zeros(4) initial_state = random_init(initial_state,f=f) initial_state[to_scan] = 0 - ranges[to_scan]/2 ##makes it scan from the start of a variables range scanned_values = np.zeros(n) reached_states = np.zeros((n,4)) step = ranges[to_scan]/n for i in range(n): ##number of scanning variables x = initial_state.copy() ##takes a copy of this state x[to_scan] = x[to_scan]+ i * step ##changes one state variable system.setState(x) if to_scan == 2: system.remap_angle() ##remaps the angle x[to_scan] = remap_angle(x[to_scan]) scanned_values[i] = x[to_scan] system.performAction(x[4]) reached_states[i,:] = system.getState() plt.plot(scanned_values, reached_states[:, 0], Label='Position') plt.plot(scanned_values, reached_states[:, 1], Label='Velocity') plt.plot(scanned_values, reached_states[:, 2], Label='Angle') plt.plot(scanned_values, reached_states[:, 3], Label='Angular Velocity') plt.xlabel(labels[to_scan]) plt.ylabel('Reached state') plt.legend(loc='upper left') plt.title( 'Initial state: ' + str(np.round(initial_state,2)) + ' Scan through ' + labels[to_scan] ) plt.show()
def modelL(p): max_t = 5 state1 = np.array([0, 0, 0.1, 0, 0]) state2 = np.array([0, 0, 0.3, 0, 0]) init_state = state1 cartpole = CartPole() steps = int(max_t / cartpole.delta_time) # 0.2s per step Xn = init_state Xn_new = Xn L_model = 0 for i in range(steps): Xn = Xn_new # change the action term according to the policy Xn[-1] = np.dot(p, Xn[:-1]) Xn = Xn.reshape(1, Xn.shape[0]) Yn = predict(Xn, XM, sigma, alpha) Yn.resize(Xn.shape) Xn_new = Xn + Yn Xn_new = np.array(Xn_new[0]) Xn_new[2] = remap_angle(Xn_new[2]) L_model += loss(Xn_new) return L_model
def non_linear_loss_function(p): model = non_linear_model(10, f=True) #model.read_from_file('m=500n=10000') model.read_from_file('m=1000n=10000') policy = non_linear_model(10, policy=True) #policy.read_from_file('nonzero') ### change at somepoint so that sd's are part of the p input? #policy.read_from_file('justtop') policy.read_from_file('wholetest') # policy.read_from_file('flickup') #different optmisation situations optimise_loc = False sym = False top_constant = False bot_constant = False bounce_back = True if optimise_loc == True: m = int((p.shape[0] - 2) / 4) #policy.m #policy = non_linear_model(m,policy=True) #policy.sd = np.ones((4,m)) policy.basis[:, 0] = p[:4].T policy.basis[:, 1] = -policy.basis[:, 0] policy.basis[:, 2] = p[4:4 + 4].T policy.basis[:, 3] = -policy.basis[:, 2] p = [-27.669, 27.669, -10.98, 10.98] policy.sd[0, :] = [30, 30, 30, 30] policy.sd[1, :] = [30, 30, 30, 30] if sym: policy.basis[:, 0] = p[2:6].T policy.basis[:, 1] = -policy.basis[:, 0] policy.basis[:, 2] = p[6:10].T policy.basis[:, 3] = -policy.basis[:, 2] #print(policy.basis) p = [p[0], -p[0], p[1], -p[1]] if top_constant: p = np.array([-27.669, 27.669, -10.98, 10.98, p[0], p[1], 0, p[2]]) #for updating 3 on lhs if bot_constant: p = [p[0], -p[0], p[1], -p[1], 76, -77, 0, -37.025] if bounce_back: p = np.array([ -27.669, 27.669, -10.98, 10.98, 76, -77, p[0], -37.025, -p[0], p[1], -p[1] ]) loss = 0 #initial=np.array([0,0,np.pi,0]) #initial = np.array([0, 0.2, 0.05, -0.2])*5 initial = np.array([-5.5, -3.36, -0.56, 0.42]) n = 30 loss = loss + loss_pos(initial) x = np.zeros((4, 1)) x[:, 0] = initial[0:4] x_extended = np.zeros((5)) system = CartPole(visual=False) system.setState(initial) for i in range(n): # # using non linear modeelling for training change = np.zeros((1, 4)) x_policy = policy.transform_x(x) if isinstance(p, (list, tuple, np.ndarray)): force = np.matmul(p, x_policy) else: force = np.matmul(p._value, x_policy) x_extended[0:4] = x[:, 0] x_extended[4] = force test = np.matmul(model.alpha.T, model.transform_x(x_extended)) x = x + test x[2] = remap_angle(x[2]) # x = system.getState() # x_policy = policy.transform_x(x) # if isinstance(p, (list, tuple, np.ndarray)): # force = np.matmul(p,x_policy) # else: # force = np.matmul(p._value, x_policy) # # #force = np.matmul(p, x) # system.performAction(force) # x= system.getState() loss += loss_pos(x) return loss
def test_policy(p, n, visible=False, sd=0.03, bottom=False, nlm=None, Top=False): system = CartPole(visual=visible) #system.setState(np.array([0,0.01,-0.01,-0.01])) system.setState( np.array([ random.normalvariate(0, sd), random.normalvariate(0, sd), random.normalvariate(0, sd), random.normalvariate(0, sd) ])) if nlm != None and Top == False: bottom = True if bottom: system.setState(np.array([0, 0, np.pi, 0])) #sd=1 #system.setState(np.array([random.normalvariate(0, sd), random.normalvariate(0, sd), np.pi ,random.normalvariate(0, sd)])) #system.setState(np.array([-5.5,-3.36,-0.56,0.42])) state_history = np.empty((n, 4)) #system.setState(np.array([0, 0, 0,5.1])) #system.setState(np.array([0, 0.2, 0.05, -0.2])*5) # system.setState(np.array([0, 0.2, 1, -4]) ) #get it to 1 rad at -4rads^1 and itll settle for i in range(n): x = system.getState() if x.ndim == 1: state_history[i, :] = x else: state_history[i, :] = x.reshape((4)) if nlm == None: force = np.matmul(p, x) else: x_ext = nlm.transform_x(x) force = np.matmul(p, x_ext) #state_history[i,2]=force system.performAction(force) system.remap_angle() plt.plot(state_history[:, 2], label=r'$\theta$') plt.plot(state_history[:, 1], label='v') plt.plot(state_history[:, 3], label=r'$\dot{\theta}$') plt.plot(state_history[:, 0], label='x') plt.xlabel('Iteration') #plt.ylabel(r'$\theta$') plt.legend(loc='lower left') plt.title('P=' + str(p)) #plt.ylim([-0.2,0.2]) plt.show()
def loss_trajectory(initial, n, p, model=None, visual=False, nlp=None): loss = 0 if model == None: system = CartPole(visual) system.setState(initial) loss += system.loss() else: loss += loss_pos(initial) x = initial[0:4] x_extended = np.zeros(5) for i in range(n): if model == None: #using model dynamics for initial scans x = system.getState() force = np.matmul(p, x) system.performAction(force) system.remap_angle() loss += system.loss() else: # using non linear modeelling for training if nlp == None: force = np.matmul(p, x) else: force = np.matmul(p, nlp.transform_x(x)) x_extended[0:4] = x x_extended[4] = force change = np.matmul(model.alpha.T, model.transform_x(x_extended)).T x += change x[2] = remap_angle(x[2]) loss += loss_pos(x) return loss
from CartPole import * import numpy as np import matplotlib.pyplot as plt """task1.1 rollout simulation""" cartpole1 = CartPole() cartpole1.delta_time = 0.05 max_t = 5.0 # terminate time for the simulation n = int(max_t / cartpole1.delta_time) # number of iterations for performAction state1 = [0, 1, np.pi, 0] # nonzero cart velocity state2 = [0, 0, np.pi, -14.7] # nonzero angular velocity init_state = state1 cartpole1.setState(state=init_state) # list of variables x, x_dot, theta, theta_dot = [], [], [], [] t = np.arange(0, max_t, cartpole1.delta_time) # simulation for i in range(n): cartpole1.performAction() # cartpole1.remap_angle() current_state = cartpole1.getState() x.append(current_state[0]) x_dot.append(current_state[1]) theta.append(current_state[2]) theta_dot.append(current_state[3]) # plotting the results fig, axs = plt.subplots(2, 2, figsize=(9, 16), constrained_layout=True) axs[0, 0].plot(t, x) axs[0, 0].set_title('cart_location') axs[0, 0].set_xlabel('time (s)') axs[0, 0].set_ylabel('location (m)') axs[0, 1].plot(t, x_dot)
# change the action term according to the policy cartpole.setState(Xn[:4]) action = np.dot(p, Xn) cartpole.performAction(action) cartpole.remap_angle() Xn_new = cartpole.getState() X_cartpole.append(np.array(Xn_new)) L_model += loss(Xn_new) X_cartpole = np.array(X_cartpole) return X_cartpole[:-1], L_model # N = 1000 # NO of datapoints # M = 1000 # NO of data locations for basis function # lam = 10**(-4) # variance of data noise cartpole1 = CartPole() # X = [] # Y = [] # for i in range(N): # x = random.uniform(-5,5) # x_dot = random.uniform(-10,10) # theta = random.uniform(-np.pi,np.pi) # theta_dot = random.uniform(-15,15) # act = random.uniform(-10,10) # Xn = np.array([x,x_dot,theta,theta_dot,act]) # X.append(Xn) # cartpole1.setState(Xn[:-1]) # cartpole1.performAction(action=Xn[-1]) # Xn_1 = np.array(cartpole1.getState()) # Y.append(Xn_1-Xn[:-1]) # X = np.array(X)
def test_force_start(f=0,n=30, all=False): system = CartPole(visual=False) system.setState([0, 0, np.pi, 0]) ## initialise state_history = np.empty((n + 1, 4,n)) for i in range(n): state_history[0, :,i] = [0, 0, np.pi,0] time = np.arange(0, (n + 0.5) * 0.1, 0.1) for fn in range(n): system.setState([0, 0, np.pi, 0]) for i in range(n): # update dynamics n times if i < fn+1: system.performAction(f) ## runs for 0.1 secs with no else: system.performAction(0) system.remap_angle() state_history[i + 1, :,fn] = system.getState()# if fn%3==0: plt.plot(time, state_history[:, 2,fn], Label=str(fn+1) + ' Steps') if all: plt.plot(time, state_history[:, 0, fn], Label=str(fn + 1) + ' Steps-x') plt.plot(time, state_history[:, 1, fn], Label=str(fn + 1) + ' Steps-v') plt.plot(time, state_history[:, 3, fn], Label=str(fn + 1) + r' Steps-$\dot{\theta}$') plt.xlabel('Time') plt.ylabel(r'$\theta$') plt.legend(loc='lower left') plt.title('Constant force effect for start') plt.show()
def predict(X,XM,sigma,alpha): K_MN = kernel(X,XM,sigma) return np.matmul(K_MN,alpha) def l(X,sigma): """X: state vector""" sum = 0 for i,x in enumerate(X): sum += -0.5*np.linalg.norm(x)**2/sigma[i]**2 return 1.0-np.exp(sum) N = 1000 # NO of datapoints M = 800 # NO of data locations for basis function lam = 10**(-4) # variance of data noise cartpole1 = CartPole() # added from 2.2 X = [] Y = [] for i in range(N): x = random.uniform(-5,5) x_dot = random.uniform(-10,10) theta = random.uniform(-np.pi,np.pi) theta_dot = random.uniform(-15,15) act = random.uniform(-20,20) Xn = np.array([x,x_dot,theta,theta_dot,act]) X.append(Xn) cartpole1.setState(Xn[:-1]) cartpole1.performAction(action=Xn[-1]) Xn_1 = np.array(cartpole1.getState())
import numpy as np import matplotlib.tri as tri from CartPole import * # task 1.2 changes of state: model Y = X(T) - X(0) """initialisation""" cartpole1 = CartPole() # scans of variables """setting the state scans""" x_scan = np.linspace(-5, 5, 100) x_dot_scan = np.linspace(-10, 10, 100) theta_scan = np.linspace(np.pi - 0.5, np.pi, 100) theta_dot_scan = np.linspace(10, 15, 100) # """Y plot for x_dot scan""" # Y0 = [] # for x in x_scan: # X = np.array([x,0,np.pi,0]) # cartpole1.setState(X) # cartpole1.performAction() # Y = np.array(cartpole1.getState()) # Y0.append((Y-X)) # Y0 = np.array(Y0) # """Y plot for x_dot scan""" # Y1 = [] # for x_dot in x_dot_scan: # X = np.array([0,x_dot,np.pi,0]) # cartpole1.setState(X) # cartpole1.performAction()
import numpy as np import numpy.random as random from sklearn import linear_model from CartPole import * # from task1_3 import * # linear model Y=CX, X(n+1)=X(n)+CX(n) cartpole1 = CartPole() """generating random states X and corresponding Y""" # X = [] # Y = [] # for i in range(1000): # x = random.uniform([-5,5,1])[0] # x_dot = random.uniform([-10,10,1])[0] # theta = random.uniform([-np.pi,np.pi,1])[0] # theta_dot = random.uniform([-15,15,1])[0] # Xn = np.array([x,x_dot,theta,theta_dot]) # X.append(Xn) # cartpole1.setState(Xn) # cartpole1.performAction() # Xn_1 = np.array(cartpole1.getState()) # Y.append(Xn_1-Xn) # X = np.array(X) # Y = np.array(Y) # """performing linear regression""" # model = linear_model.LinearRegression() # model.fit(X,Y) # change the time step
def rollout (initial_state, n = 100, visual =False,f=0,fn=0): ## takes an intitial state array for the initial cart and angular velocity, ## itialises at stable equilibrium theta = pi , x =0 ## runs for n iterations of euler dynamics each iteration is 0.1 seconds ## no applied force ## plots state variables system = CartPole(visual) system.setState([0, initial_state[0], np.pi, initial_state[1]]) ## initialise state_history= np.empty((n+1,4)) state_history[0,:] = [0, initial_state[0], np.pi, initial_state[1]] for i in range(n): #update dynamics n times if i<fn: system.performAction(f) ## runs for 0.1 secs with no else: system.performAction(0) system.remap_angle() state_history[i+1,:] = system.getState() time=np.arange(0,(n+0.5)*0.1,0.1) plt.plot(time,state_history[:,0], Label = 'Position') plt.plot(time,state_history[:,1], Label = 'Velocity') plt.plot(time,state_history[:,2], Label = 'Angular Position') plt.plot(time,state_history[:,3], Label = 'Angular Velocity') plt.xlabel('Time') plt.legend(loc='upper right') plt.title('Initial Velocity : '+ str(initial_state[0])+ ' , Initial Angular velocity : '+ str(initial_state[1])) plt.show()
def contour_plot(n,x_var, y_var , cont, visual = False , model = None,error=False,f=False): # plots a 3d surface plot and contoru plot where x_var , Y_var are scanned through the suitable range # cont is the variable to be plotted on the contours #n is the number of points to evaluate at in the range # x_var,y_var and cont are either 0,1,2,3 v=4 if f: v=5 system = CartPole(visual) initial_state = np.zeros(v) initial_state = random_init(initial_state,f=f) force=0 if f: force = initial_state[4] scanned_x = np.arange( - ranges[x_var]/2, ranges[x_var]/2, ranges[x_var]/n ) ##scanning variable 1yield scanned_y = np.arange( - ranges[y_var]/2, ranges[y_var]/2, ranges[y_var]/n ) ##scanning variable 1yield X, Y = np.meshgrid(scanned_x, scanned_y) rav_X=np.ravel(X) rav_Y=np.ravel(Y) rav_Z=np.zeros(rav_X.shape[0]) for i in range(rav_X.shape[0]): x = initial_state.copy() x[x_var] = rav_X[i] x[y_var] = rav_Y[i] system.setState(x) system.performAction(force) rav_Z[i] = system.getState()[cont] if error ==True: change = np.zeros((1,v)) change[0,0:4] = np.matmul(model.alpha.T, model.transform_x(x)).T next_state = x + change rav_Z[i] = abs(next_state[0,cont] - rav_Z[i]) Z = rav_Z.reshape(X.shape) colour = 'inferno' fig = plt.figure() ax = fig.add_subplot(111, projection='3d') surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1 , cmap=colour) # Add a color bar which maps values to colors. cbar =fig.colorbar(surf, shrink=0.5, aspect=5) cbar.set_label(labels[cont]) plt.xlabel(labels[x_var]) plt.ylabel(labels[y_var]) ax.set_zlabel(labels[cont]) plt.title('Initial state: ' + str(np.round(initial_state,2)) + '\n Scan through ' + labels[x_var]+ ' and ' + labels[y_var] ) if error == True: ax.set_zlabel('Error in ' + labels[cont]) plt.scatter(model.basis[x_var,:],model.basis[y_var,:],np.ones(model.basis[x_var,:].shape[0])*0.3) else: cont_plot = plt.tricontourf(rav_X, rav_Y, rav_Z, levels=14, cmap=colour, offset = np.amin(rav_Z)) plt.show() cont_plot = plt.tricontourf(rav_X,rav_Y,rav_Z,levels=14, cmap=colour) cbar = plt.colorbar(cont_plot,shrink=0.5, aspect=5) cbar.set_label(labels[cont]) plt.xlabel(labels[x_var]) plt.ylabel(labels[y_var]) plt.title( 'Initial state: ' + str(np.round(initial_state, 2)) + '\n Scan through ' + labels[x_var] + ' and ' + labels[ y_var]) plt.show()
def scan_all(n,type=0,visual=False, c =None,f=False,nlm=False): ##type 0 is a regular scan, type 1 is a change in variable scan ## c iks a 4x4 linear coefficent matrix for plotting model scans # scans through all state variables and plots either the next state or the change in state system = CartPole(visual) if f: initial_state = np.zeros(5) else: initial_state = np.zeros(4) initial_state = random_init(initial_state,f=f) line_lables=[None,None,None,None,None,None,None,None] fig = plt.figure() for to_scan in range(4): plot_no=to_scan scanned_state = np.zeros((4,n)) if nlm: if c.v==5: to_scan+=1 scanned_state = np.zeros((5, n)) initial_state[to_scan] = 0 - ranges[to_scan]/2 ##makes it scan from the start of a variables range scanned_values = np.zeros(n) reached_states = np.zeros((n,4)) Y = np.zeros((n,4)) step = ranges[to_scan]/n for i in range(n): ##number of scanning variables x = initial_state.copy() ##takes a copy of this state x[to_scan] = x[to_scan]+ i * step ##changes one state variable system.setState(x) if to_scan == 2: system.remap_angle() ##remaps the angle x[to_scan] = remap_angle(x[to_scan]) scanned_values[i] = x[to_scan] scanned_state[:,i] = x if f: system.performAction(x[4]) else: system.performAction(0) reached_states[i,:] = system.getState() Y[i,:] = reached_states[i,:] - x[:4] fig.add_subplot(2,2,plot_no+1) plt.subplots_adjust(hspace=0.3) if plot_no == 3: #add labels line_lables=['x','v',r"$\theta$",r"$\dot{\theta}$",'Predicted x','Predicted v',r"Predicted $\theta$",r" Predicted $\dot{\theta}$"] if type == 0 : #plot regular scan plt.plot(scanned_values, reached_states[:, 0], Label=line_lables[0]) plt.plot(scanned_values, reached_states[:, 1], Label=line_lables[1]) plt.plot(scanned_values, reached_states[:, 2], Label=line_lables[2]) plt.plot(scanned_values, reached_states[:, 3], Label=line_lables[3]) plt.ylabel('Reached state') elif type ==1: #plot change if np.any(c)!=None: if nlm: scanned_state=c.transform_x(scanned_state) predictions = np.matmul(c.alpha.T,scanned_state) else: predictions = np.matmul(c,scanned_state) plt.plot(scanned_values[:], predictions[0, :],linestyle=':',color= 'b', Label=line_lables[4]) plt.plot(scanned_values[:], predictions[1, :],linestyle=':',color= 'orange', Label=line_lables[5]) plt.plot(scanned_values[:], predictions[2, :],linestyle=':',color= 'g', Label=line_lables[6]) plt.plot(scanned_values[:], predictions[3, :],linestyle=':',color= 'r', Label=line_lables[7]) plt.plot(scanned_values[:], Y[:, 0],color= 'b', Label=line_lables[0]) plt.plot(scanned_values[:], Y[:, 1],color= 'orange', Label=line_lables[1]) plt.plot(scanned_values[:], Y[:, 2],color= 'g', Label=line_lables[2]) plt.plot(scanned_values[:], Y[:, 3],color= 'r', Label=line_lables[3]) plt.ylabel('Change in State Variable') labels = [ "Position" , "Velocity" , "Angle" , "Angular Velocity", "Force"] plt.xlabel(labels[to_scan]) #plt.legend(loc='upper left') plt.title( 'Initial state: ' + str(np.round(initial_state,2)) + ' Scan through ' + labels[to_scan] ) fig.legend(loc='lower center', ncol=4) plt.show()
return np.matmul(K_MN, alpha) def l(X, sigma): """X: state vector""" sum = 0 for i, x in enumerate(X): sum += -0.5 * np.linalg.norm(x)**2 / sigma[i]**2 return 1.0 - np.exp(sum) # generating dataset N = 1000 # NO of datapoints M = 640 # NO of data locations for basis function lam = 10**(-4) # variance of data noise cartpole1 = CartPole() X = [] Y = [] for i in range(N): x = random.uniform(-5, 5) x_dot = random.uniform(-10, 10) theta = random.uniform(-np.pi, np.pi) theta_dot = random.uniform(-15, 15) act = random.uniform(-20, 20) Xn = np.array([x, x_dot, theta, theta_dot, act]) X.append(Xn) cartpole1.setState(Xn[:-1]) cartpole1.performAction(action=Xn[-1]) Xn_1 = np.array(cartpole1.getState()) Y.append(Xn_1 - Xn[:-1]) X = np.array(X)
import CartPole CartPole.CartPole(Train=False, epoch=1000, game_step=1000, gradient_update=10, learning_rate=0.01, discount_factor=0.95, save_weight=100, save_path="CartPole", rendering=False).Graph()
from CartPole import * # linear model Y=CX def lin_reg(X, Y): X = np.matrix(X) XT = np.matrix.transpose(X) Y = np.matrix(Y) XT_X = np.matmul(XT, X) XT_Y = np.matmul(XT, Y) betas = np.matmul(np.linalg.inv(XT_X), XT_Y) return betas cartpole1 = CartPole() """generating random states X and corresponding Y""" X = [] Y = [] N = 1000 # no of datapoints for i in range(N): x = random.uniform([-5, 5, 1])[0] x_dot = random.uniform([-10, 10, 1])[0] theta = random.uniform([-np.pi, np.pi, 1])[0] theta_dot = random.uniform([-15, 15, 1])[0] Xn = np.array([x, x_dot, theta, theta_dot]) X.append(Xn) cartpole1.setState(Xn) cartpole1.performAction() cartpole1.remap_angle() Xn_1 = np.array(cartpole1.getState())
os.environ["CUDA_VISIBLE_DEVICES"] = '0' # 현재 사용하고 있는 GPU 번호를 얻기 위한 코드 - 여러개의 GPU를 쓸 경우 정보 확인을 위해! print("<<< Ubuntu Terminal 창에서 지정해준 경우, 무조건 GPU : 1대, GPU 번호 : 0 라고 출력 됨 >>>") local_device_protos = device_lib.list_local_devices() GPU_List = [x.name for x in local_device_protos if x.device_type == 'GPU'] # gpu_number_list = [] print("<<< # 사용 가능한 GPU : {} 대 >>>".format(len(GPU_List))) print("<<< # 사용 가능한 GPU 번호 : >>>", end="") for i, GL in enumerate(GPU_List): num = GL.split(":")[-1] # gpu_number_list.append(num) if len(GPU_List) - 1 == i: print(" " + num) else: print(" " + num + ",", end="") CP = CartPole(model_name="CartPole", epoch=1000, gradient_update=10, learning_rate=0.01, training_display=True, SaveGameMovie=True, discount_factor=0.95, save_weight=100, save_path="CartPole", only_draw_graph=False) #CP.train #학습 CP.test # 테스트
import numpy as np from CartPole import * # task 1.2 changes of state """initialisation""" cartpole1 = CartPole() """setting the state scans""" x_scan = np.linspace(-5,5,100) x_dot_scan = np.linspace(-10,10,100) theta_scan = np.linspace(np.pi-0.5,np.pi,100) theta_dot_scan = np.linspace(10,15,100) """Y plot for x scan""" Y0 = [] for x in x_scan: X = [x,2,np.pi-1,5] cartpole1.setState(X) cartpole1.performAction() Y = cartpole1.getState() Y0.append(Y) Y0 = np.array(Y0) """Y plot for x_dot scan""" Y1 = [] for x_dot in x_dot_scan: X = [0,x_dot,np.pi-1,5] cartpole1.setState(X) cartpole1.performAction() Y = cartpole1.getState()