def __init__(self, X, Y, U, phi=observables.monomials(2), psi=observables.monomials(2), regressor='ols', is_generator=False, p_inv=True): self.X = X self.Y = Y self.U = U self.phi = phi self.psi = psi # Get number of data points self.N = self.X.shape[1] # Construct Phi and Psi matrices self.Phi_X = self.phi(X) self.Phi_Y = self.phi(Y) self.Psi_U = self.psi(U) # TODO: Create Koopman Generator Tensor if is_generator is True # Get dimensions self.dim_phi = self.Phi_X.shape[0] self.dim_psi = self.Psi_U.shape[0] # Make sure data is full rank # checkMatrixRank(self.Phi_X, "Phi_X") # checkMatrixRank(self.Phi_Y, "Phi_Y") # checkMatrixRank(self.Psi_U, "Psi_U") # Make sure condition numbers are small # checkConditionNumber(self.Phi_X, "Phi_X") # checkConditionNumber(self.Phi_Y, "Phi_Y") # checkConditionNumber(self.Psi_U, "Psi_U") # Build matrix of kronecker products between u_i and x_i for all 0 <= i <= N self.kronMatrix = np.empty([self.dim_psi * self.dim_phi, self.N]) for i in range(self.N): self.kronMatrix[:, i] = np.kron(self.Psi_U[:, i], self.Phi_X[:, i]) # Solve for M and B if regressor == 'rrr': self.M = estimate_L.rrr(self.kronMatrix.T, self.Phi_Y.T).T self.B = estimate_L.rrr(self.Phi_X.T, self.X.T) if regressor == 'sindy': self.M = estimate_L.SINDy(self.kronMatrix.T, self.Phi_Y.T).T self.B = estimate_L.SINDy(self.Phi_X.T, self.X.T) else: self.M = estimate_L.ols(self.kronMatrix.T, self.Phi_Y.T, p_inv).T self.B = estimate_L.ols(self.Phi_X.T, self.X.T, p_inv) # reshape M into tensor K self.K = np.empty([self.dim_phi, self.dim_phi, self.dim_psi]) for i in range(self.dim_phi): self.K[i] = self.M[i].reshape([self.dim_phi, self.dim_psi], order='F')
def fit(self, X, U, timesteps=2, lamb=10): """ Fits a policy pi to the dataset using Koopman RL Parameters: X: State data U: Action data """ self.X = X self.U = U # self.min_action = np.min(U) # self.max_action = np.max(U) self.X_tilde = np.append(X, [U], axis=0) # extended states self.d = self.X_tilde.shape[0] self.m = self.X_tilde.shape[1] # self.s = int(self.d*(self.d+1)/2) # number of second order poly terms self.Psi_X_tilde = self.psi(self.X_tilde) # self.Psi_X_tilde_T = Psi_X_tilde.T self.k = self.Psi_X_tilde.shape[0] self.nablaPsi = self.psi.diff(self.X_tilde) self.nabla2Psi = self.psi.ddiff(self.X_tilde) self.dPsi_X_tilde = np.zeros((self.k, self.m)) for row in range(self.k): for column in range(self.m-1): self.dPsi_X_tilde[row, column] = dpsi( self.X_tilde, self.nablaPsi, self.nabla2Psi, row, column ) # self.dPsi_X_tilde_T = dPsi_X_tilde.T # L = rrr(Psi_X_tilde_T, dPsi_X_tilde_T) self.L = estimate_L.rrr(self.Psi_X_tilde.T, self.dPsi_X_tilde.T) # self.L = estimate_L.rrr(self.Psi_X_tilde, self.dPsi_X_tilde) self.z_m = np.zeros((self.k, self.k)) self.phi_m_inverse = np.linalg.inv(np.identity(self.k)) self.V, self.pi = learningAlgorithm( self.L, self.X, self.psi, self.Psi_X_tilde, self.action_bounds, self.reward, timesteps=timesteps, lamb=lamb )
continue Koopman_operators.append( estimate_L.ols(split_datasets[action]['phi_x'].T, split_datasets[action]['phi_x_prime'].T).T) Koopman_operators = np.array(Koopman_operators, dtype=object) #%% Build kronMatrix kronMatrix = np.empty((d_psi * d_phi, N)) for i in range(N): kronMatrix[:, i] = np.kron(Psi_U[:, i], Phi_X[:, i]) #%% Estimate M M = estimate_L.ols(kronMatrix.T, Y.T).T # Phi_Y.T M_2 = estimate_L.SINDy(kronMatrix.T, Y.T).T # Phi_Y.T M_3 = estimate_L.rrr(kronMatrix.T, Y.T).T # Phi_Y.T #%% Reshape M into K tensor K = np.empty((2, d_phi, d_psi)) for i in range(2): K[i] = M[i].reshape((d_phi, d_psi), order='F') K_2 = np.empty((2, d_phi, d_psi)) for i in range(2): K_2[i] = M_2[i].reshape((d_phi, d_psi), order='F') K_3 = np.empty((2, d_phi, d_psi)) for i in range(2): K_3[i] = M_3[i].reshape((d_phi, d_psi), order='F') def K_u(K, u): return np.einsum('ijz,z->ij', K, psi(u))
extended_koopman_operator = estimate_L.ols(Phi_XU[:, :-1].T, Phi_XU[:, 1:].T).T extended_B = estimate_L.ols(Phi_XU.T, XU.T) #%% Build kronMatrix kronMatrix = np.empty((dim_psi * dim_phi, N)) for i in range(N): kronMatrix[:, i] = np.kron(Psi_U[:, i], Phi_X[:, i]) #%% Estimate M and B matrices num_ranks = 20 - 1 M = estimate_L.ols(kronMatrix.T, Phi_Y.T).T M_2 = estimate_L.SINDy(kronMatrix.T, Phi_Y.T).T M_rrrs = np.empty((num_ranks, dim_phi, dim_phi * dim_psi)) for i in range(num_ranks): M_rrrs[i] = estimate_L.rrr(kronMatrix.T, Phi_Y.T, rank=i + 1).T print("M shape:", M.shape) assert M.shape == (dim_phi, dim_phi * dim_psi) B = estimate_L.ols(Phi_X.T, X.T) assert B.shape == (dim_phi, X.shape[0]) #%% Reshape M into K tensor K = np.empty((dim_phi, dim_phi, dim_psi)) for i in range(dim_phi): K[i] = M[i].reshape((dim_phi, dim_psi), order='F') K_2 = np.empty((dim_phi, dim_phi, dim_psi)) for i in range(dim_phi): K_2[i] = M_2[i].reshape((dim_phi, dim_psi), order='F') K_rrrs = np.empty((num_ranks, dim_phi, dim_phi, dim_psi))
num_lifted_state_observations)) for i in range(num_lifted_state_observations): kron = np.kron(Psi_U[:, i], Phi_X[:, i]) psiPhiMatrix[:, i] = kron return psiPhiMatrix psiPhiMatrix = getPsiPhiMatrix(Psi_U, Phi_X) print("PsiPhiMatrix shape:", psiPhiMatrix.shape) # || Y - X B || # || Phi_Y - M PsiPhi || # || Y.T - B.T X.T || # || Phi_Y.T - PsiPhi.T M.T || M = estimate_L.rrr(psiPhiMatrix.T, getPhiMatrix(Y).T).T print("M shape:", M.shape) assert M.shape == (num_lifted_state_features, num_lifted_state_features * num_lifted_action_features) K = np.empty((num_lifted_state_features, num_lifted_state_features, num_lifted_action_features)) for i in range(M.shape[0]): K[i] = M[i].reshape( (num_lifted_state_features, num_lifted_action_features)) print(K.shape) K_u_100 = np.einsum('ijz,z->ij', K, psi(U[:, 100])) assert K_u_100.shape == (num_lifted_state_features, num_lifted_state_features) K_u_alt = K[:, :, 0]
(num_lifted_action_features * num_lifted_state_features, num_lifted_state_observations)) for i in range(num_lifted_state_observations): kron = np.kron(Psi_U[:, i], Phi_X[:, i]) psiPhiMatrix[:, i] = kron return psiPhiMatrix #%% psiPhiMatrix = getPsiPhiMatrix(Psi_U, Phi_X) print("PsiPhiMatrix shape:", psiPhiMatrix.shape) M = estimate_L.ols(psiPhiMatrix.T, getPhiMatrix(Y_opt).T).T M_2 = estimate_L.SINDy(psiPhiMatrix.T, getPhiMatrix(Y_opt).T).T M_3 = estimate_L.rrr(psiPhiMatrix.T, getPhiMatrix(Y_opt).T).T print("M shape:", M.shape) assert M.shape == (num_lifted_state_features, num_lifted_state_features * num_lifted_action_features) K = np.empty((num_lifted_state_features, num_lifted_state_features, num_lifted_action_features)) for i in range(M.shape[0]): K[i] = M[i].reshape( (num_lifted_state_features, num_lifted_action_features), order='F') print("K shape:", K.shape) K_2 = np.empty((num_lifted_state_features, num_lifted_state_features, num_lifted_action_features)) for i in range(M_2.shape[0]): K_2[i] = M_2[i].reshape( (num_lifted_state_features, num_lifted_action_features), order='F')
# return matrix # Psi_X = getPsiMatrix(psi, X_train) # Psi_Y = getPsiMatrix(psi, Y_train) # Psi_X_0 = getPsiMatrix(psi, X_0_train).T # Psi_Y_0 = getPsiMatrix(psi, Y_0_train).T # Psi_X_1 = getPsiMatrix(psi, X_1_train).T # Psi_Y_1 = getPsiMatrix(psi, Y_1_train).T #%% Koopman # || Y - X B || # || Y.T - B.T X.T || # || Psi_Y_0 - K Psi_X_0 || # || Psi_Y_0.T - Psi_X_0.T K.T || K_0 = estimate_L.rrr(Psi_X_0.T, Psi_Y_0.T).T K_1 = estimate_L.rrr(Psi_X_1.T, Psi_Y_1.T).T eigenvalues_0, eigenvectors_0 = np.linalg.eig(K_0) eigenvalues_1, eigenvectors_1 = np.linalg.eig(K_1) eigenfunction_0 = list(map(lambda psi_x: np.dot(psi_x,eigenvectors_0[:,0]), Psi_X_0.T)) eigenfunction_1 = list(map(lambda psi_x: np.dot(psi_x,eigenvectors_1[:,0]), Psi_X_1.T)) plt.plot(eigenvectors_0[:,:3]) plt.plot(eigenvectors_1[:,:3]) plt.title("Eigenvectors of Koopman operator for action 0 and 1") plt.ylabel('Eigenvector Output') plt.xlabel('State Snapshots') plt.show() plt.plot(eigenfunction_0)
def learningAlgorithm(X, psi, Psi_X, action_bounds, reward, timesteps=4, cutoff=8, lamb=10): # _divmax = 20 Psi_X_T = Psi_X.T # placeholder functions V = lambda x: x pi_hat_star = lambda x: x # constants n = Psi_X.shape[0] d = X.shape[0] low, high = action_bounds constant = 1 / lamb # V^{\pi*_0} currentV = np.zeros((1, X.shape[1])) lastV = currentV.copy() t = 0 while t < timesteps: V_X = currentV.copy() B = rrr(Psi_X_T, V_X.T) @nb.jit(forceobj=True, fastmath=True) def Lv_hat(x, u): nablaPsi_x = psi.diff(x.reshape(-1, 1)).reshape((n, d)) y = np.append(x, x[0]**2).reshape(-1, 1) dy_dt = K @ y + D_y * u return ((nablaPsi_x.T @ B).T @ F @ dy_dt)[0, 0] @nb.jit(forceobj=True, fastmath=True) def compute(u, x): inner = constant * (reward(x, u) + Lv_hat(x, u)) return mpexp(inner) def pi_hat_star(u, x): # action given state numerator = compute(u, x) denominator = qp.quad(compute, low, high, args=(x, ))[0] return numerator / denominator def compute_2(u, x): eval_pi_hat_star = pi_hat_star(u, x) return (reward(x, u) - (lamb * ln(eval_pi_hat_star)) + Lv_hat(x, u)) * eval_pi_hat_star def V(x): return qp.quad(compute_2, low, high, args=(x, ))[0] lastV = currentV for i in range(currentV.shape[1]): x = X[:, i] currentV[:, i] = V(x) if (i + 1) % 250 == 0: print(i + 1) t += 1 print("Completed learning step", t) return currentV, pi_hat_star