def simulate(): # dynfun = dynamics(stochastic=False) dynfun = dynamics(stochastic=True) # uncomment for stochastic dynamics costfun = cost() T = 100 # episode length N = 100 # number of episodes gamma = 0.95 # discount factor A = dynfun.A B = dynfun.B Q = costfun.Q R = costfun.R L, P = Riccati(A, B, Q, R) total_costs = [] for n in range(N): costs = [] x = dynfun.reset() for t in range(T): # policy u = (-L @ x) # get reward c = costfun.evaluate(x, u) costs.append((gamma**t) * c) # dynamics step x = dynfun.step(u) total_costs.append(sum(costs)) return np.mean(total_costs)
from model import dynamics, cost import numpy as np dynfun = dynamics(stochastic=False) # dynfun = dynamics(stochastic=True) # uncomment for stochastic dynamics costfun = cost() T = 100 # episode length N = 100 # number of episodes gamma = 0.95 # discount factor # Riccati recursion def Riccati(A,B,Q,R): # TODO implement infinite horizon riccati recursion #matlab code that works # P_current = Q; # P_new = P_current; # L_current = zeros(size(Q,1), size(R,2)); %general form, in ex. creates 4x1 zeros matrix # L_new = L_current; # firstIt = true; # while (norm(L_new - L_current, 2) >= 1e-4) || (firstIt == true) # if firstIt == true # firstIt = false; # end # L_current = L_new; # P_current = P_new; # L_new = -inv(R + B'*P_current*B)*(B'*P_current*A); # P_new = Q + L_new'*R*L_new + (A + B*L_new)'*P_current*(A + B*L_new);
#%% from model import dynamics, cost import numpy as np from copy import deepcopy import matplotlib.pyplot as plt from tqdm import tqdm stochastic_dynamics = False # set to True for stochastic dynamics dynfun = dynamics(stochastic=stochastic_dynamics) costfun = cost() T = 100 # episode length N = 100 # number of episodes gamma = 0.95 # discount factor total_costs = [] total_loss = [] # Riccati recursion def Riccati(A,B,Q,R): # implement infinite horizon riccati recursion P = np.zeros(np.shape(A)) P_k1 = np.ones(np.shape(A)) while np.amax(np.absolute(P_k1-P)) > 1e-1: # print(np.amax(np.absolute(P_k1-P))) P_k1 = deepcopy(P) L = -np.linalg.inv(R + B.T.dot(P_k1).dot(B)).dot(B.T).dot(P_k1).dot(A) P = Q + A.T.dot(P_k1).dot(A + B.dot(L)) P *= gamma