Exemplo n.º 1
0
    def __init__(self,
                 X,
                 Y,
                 U,
                 phi=observables.monomials(2),
                 psi=observables.monomials(2),
                 regressor='ols',
                 is_generator=False,
                 p_inv=True):
        self.X = X
        self.Y = Y
        self.U = U
        self.phi = phi
        self.psi = psi

        # Get number of data points
        self.N = self.X.shape[1]

        # Construct Phi and Psi matrices
        self.Phi_X = self.phi(X)
        self.Phi_Y = self.phi(Y)
        self.Psi_U = self.psi(U)

        # TODO: Create Koopman Generator Tensor if is_generator is True

        # Get dimensions
        self.dim_phi = self.Phi_X.shape[0]
        self.dim_psi = self.Psi_U.shape[0]

        # Make sure data is full rank
        # checkMatrixRank(self.Phi_X, "Phi_X")
        # checkMatrixRank(self.Phi_Y, "Phi_Y")
        # checkMatrixRank(self.Psi_U, "Psi_U")

        # Make sure condition numbers are small
        # checkConditionNumber(self.Phi_X, "Phi_X")
        # checkConditionNumber(self.Phi_Y, "Phi_Y")
        # checkConditionNumber(self.Psi_U, "Psi_U")

        # Build matrix of kronecker products between u_i and x_i for all 0 <= i <= N
        self.kronMatrix = np.empty([self.dim_psi * self.dim_phi, self.N])
        for i in range(self.N):
            self.kronMatrix[:, i] = np.kron(self.Psi_U[:, i], self.Phi_X[:, i])

        # Solve for M and B
        if regressor == 'rrr':
            self.M = estimate_L.rrr(self.kronMatrix.T, self.Phi_Y.T).T
            self.B = estimate_L.rrr(self.Phi_X.T, self.X.T)
        if regressor == 'sindy':
            self.M = estimate_L.SINDy(self.kronMatrix.T, self.Phi_Y.T).T
            self.B = estimate_L.SINDy(self.Phi_X.T, self.X.T)
        else:
            self.M = estimate_L.ols(self.kronMatrix.T, self.Phi_Y.T, p_inv).T
            self.B = estimate_L.ols(self.Phi_X.T, self.X.T, p_inv)

        # reshape M into tensor K
        self.K = np.empty([self.dim_phi, self.dim_phi, self.dim_psi])
        for i in range(self.dim_phi):
            self.K[i] = self.M[i].reshape([self.dim_phi, self.dim_psi],
                                          order='F')
Exemplo n.º 2
0
    def fit(self, X, U, timesteps=2, lamb=10):
        """
        Fits a policy pi to the dataset using Koopman RL

            Parameters:
                X: State data
                U: Action data

        """
        self.X = X
        self.U = U
        # self.min_action = np.min(U)
        # self.max_action = np.max(U)

        self.X_tilde = np.append(X, [U], axis=0) # extended states
        self.d = self.X_tilde.shape[0]
        self.m = self.X_tilde.shape[1]
        # self.s = int(self.d*(self.d+1)/2) # number of second order poly terms
        
        self.Psi_X_tilde = self.psi(self.X_tilde)
        # self.Psi_X_tilde_T = Psi_X_tilde.T
        self.k = self.Psi_X_tilde.shape[0]
        self.nablaPsi = self.psi.diff(self.X_tilde)
        self.nabla2Psi = self.psi.ddiff(self.X_tilde)

        self.dPsi_X_tilde = np.zeros((self.k, self.m))
        for row in range(self.k):
            for column in range(self.m-1):
                self.dPsi_X_tilde[row, column] = dpsi(
                    self.X_tilde, self.nablaPsi,
                    self.nabla2Psi, row, column
                )
        # self.dPsi_X_tilde_T = dPsi_X_tilde.T

        # L = rrr(Psi_X_tilde_T, dPsi_X_tilde_T)
        self.L = estimate_L.rrr(self.Psi_X_tilde.T, self.dPsi_X_tilde.T)
        # self.L = estimate_L.rrr(self.Psi_X_tilde, self.dPsi_X_tilde)

        self.z_m = np.zeros((self.k, self.k))
        self.phi_m_inverse = np.linalg.inv(np.identity(self.k))

        self.V, self.pi = learningAlgorithm(
            self.L, self.X, self.psi, self.Psi_X_tilde,
            self.action_bounds, self.reward,
            timesteps=timesteps, lamb=lamb
        )
Exemplo n.º 3
0
        continue

    Koopman_operators.append(
        estimate_L.ols(split_datasets[action]['phi_x'].T,
                       split_datasets[action]['phi_x_prime'].T).T)
Koopman_operators = np.array(Koopman_operators, dtype=object)

#%% Build kronMatrix
kronMatrix = np.empty((d_psi * d_phi, N))
for i in range(N):
    kronMatrix[:, i] = np.kron(Psi_U[:, i], Phi_X[:, i])

#%% Estimate M
M = estimate_L.ols(kronMatrix.T, Y.T).T  # Phi_Y.T
M_2 = estimate_L.SINDy(kronMatrix.T, Y.T).T  # Phi_Y.T
M_3 = estimate_L.rrr(kronMatrix.T, Y.T).T  # Phi_Y.T

#%% Reshape M into K tensor
K = np.empty((2, d_phi, d_psi))
for i in range(2):
    K[i] = M[i].reshape((d_phi, d_psi), order='F')
K_2 = np.empty((2, d_phi, d_psi))
for i in range(2):
    K_2[i] = M_2[i].reshape((d_phi, d_psi), order='F')
K_3 = np.empty((2, d_phi, d_psi))
for i in range(2):
    K_3[i] = M_3[i].reshape((d_phi, d_psi), order='F')


def K_u(K, u):
    return np.einsum('ijz,z->ij', K, psi(u))
Exemplo n.º 4
0
extended_koopman_operator = estimate_L.ols(Phi_XU[:, :-1].T, Phi_XU[:, 1:].T).T
extended_B = estimate_L.ols(Phi_XU.T, XU.T)

#%% Build kronMatrix
kronMatrix = np.empty((dim_psi * dim_phi, N))
for i in range(N):
    kronMatrix[:, i] = np.kron(Psi_U[:, i], Phi_X[:, i])

#%% Estimate M and B matrices
num_ranks = 20 - 1

M = estimate_L.ols(kronMatrix.T, Phi_Y.T).T
M_2 = estimate_L.SINDy(kronMatrix.T, Phi_Y.T).T
M_rrrs = np.empty((num_ranks, dim_phi, dim_phi * dim_psi))
for i in range(num_ranks):
    M_rrrs[i] = estimate_L.rrr(kronMatrix.T, Phi_Y.T, rank=i + 1).T
print("M shape:", M.shape)
assert M.shape == (dim_phi, dim_phi * dim_psi)

B = estimate_L.ols(Phi_X.T, X.T)
assert B.shape == (dim_phi, X.shape[0])

#%% Reshape M into K tensor
K = np.empty((dim_phi, dim_phi, dim_psi))
for i in range(dim_phi):
    K[i] = M[i].reshape((dim_phi, dim_psi), order='F')
K_2 = np.empty((dim_phi, dim_phi, dim_psi))
for i in range(dim_phi):
    K_2[i] = M_2[i].reshape((dim_phi, dim_psi), order='F')

K_rrrs = np.empty((num_ranks, dim_phi, dim_phi, dim_psi))
Exemplo n.º 5
0
         num_lifted_state_observations))

    for i in range(num_lifted_state_observations):
        kron = np.kron(Psi_U[:, i], Phi_X[:, i])
        psiPhiMatrix[:, i] = kron

    return psiPhiMatrix


psiPhiMatrix = getPsiPhiMatrix(Psi_U, Phi_X)
print("PsiPhiMatrix shape:", psiPhiMatrix.shape)
# || Y         - X B           ||
# || Phi_Y     - M PsiPhi      ||
# || Y.T       - B.T X.T       ||
# || Phi_Y.T   - PsiPhi.T M.T  ||
M = estimate_L.rrr(psiPhiMatrix.T, getPhiMatrix(Y).T).T
print("M shape:", M.shape)
assert M.shape == (num_lifted_state_features,
                   num_lifted_state_features * num_lifted_action_features)

K = np.empty((num_lifted_state_features, num_lifted_state_features,
              num_lifted_action_features))
for i in range(M.shape[0]):
    K[i] = M[i].reshape(
        (num_lifted_state_features, num_lifted_action_features))
print(K.shape)

K_u_100 = np.einsum('ijz,z->ij', K, psi(U[:, 100]))
assert K_u_100.shape == (num_lifted_state_features, num_lifted_state_features)

K_u_alt = K[:, :, 0]
Exemplo n.º 6
0
        (num_lifted_action_features * num_lifted_state_features,
         num_lifted_state_observations))

    for i in range(num_lifted_state_observations):
        kron = np.kron(Psi_U[:, i], Phi_X[:, i])
        psiPhiMatrix[:, i] = kron

    return psiPhiMatrix


#%%
psiPhiMatrix = getPsiPhiMatrix(Psi_U, Phi_X)
print("PsiPhiMatrix shape:", psiPhiMatrix.shape)
M = estimate_L.ols(psiPhiMatrix.T, getPhiMatrix(Y_opt).T).T
M_2 = estimate_L.SINDy(psiPhiMatrix.T, getPhiMatrix(Y_opt).T).T
M_3 = estimate_L.rrr(psiPhiMatrix.T, getPhiMatrix(Y_opt).T).T
print("M shape:", M.shape)
assert M.shape == (num_lifted_state_features,
                   num_lifted_state_features * num_lifted_action_features)

K = np.empty((num_lifted_state_features, num_lifted_state_features,
              num_lifted_action_features))
for i in range(M.shape[0]):
    K[i] = M[i].reshape(
        (num_lifted_state_features, num_lifted_action_features), order='F')
print("K shape:", K.shape)
K_2 = np.empty((num_lifted_state_features, num_lifted_state_features,
                num_lifted_action_features))
for i in range(M_2.shape[0]):
    K_2[i] = M_2[i].reshape(
        (num_lifted_state_features, num_lifted_action_features), order='F')
Exemplo n.º 7
0
#     return matrix

# Psi_X = getPsiMatrix(psi, X_train)
# Psi_Y = getPsiMatrix(psi, Y_train)

# Psi_X_0 = getPsiMatrix(psi, X_0_train).T
# Psi_Y_0 = getPsiMatrix(psi, Y_0_train).T
# Psi_X_1 = getPsiMatrix(psi, X_1_train).T
# Psi_Y_1 = getPsiMatrix(psi, Y_1_train).T

#%% Koopman
# || Y         - X B           ||
# || Y.T       - B.T X.T       ||
# || Psi_Y_0   - K Psi_X_0     ||
# || Psi_Y_0.T - Psi_X_0.T K.T ||
K_0 = estimate_L.rrr(Psi_X_0.T, Psi_Y_0.T).T
K_1 = estimate_L.rrr(Psi_X_1.T, Psi_Y_1.T).T
eigenvalues_0, eigenvectors_0 = np.linalg.eig(K_0)
eigenvalues_1, eigenvectors_1 = np.linalg.eig(K_1)
eigenfunction_0 = list(map(lambda psi_x: np.dot(psi_x,eigenvectors_0[:,0]), Psi_X_0.T))
eigenfunction_1 = list(map(lambda psi_x: np.dot(psi_x,eigenvectors_1[:,0]), Psi_X_1.T))


plt.plot(eigenvectors_0[:,:3])
plt.plot(eigenvectors_1[:,:3])
plt.title("Eigenvectors of Koopman operator for action 0 and 1")
plt.ylabel('Eigenvector Output')
plt.xlabel('State Snapshots')
plt.show()

plt.plot(eigenfunction_0)
Exemplo n.º 8
0
def learningAlgorithm(X,
                      psi,
                      Psi_X,
                      action_bounds,
                      reward,
                      timesteps=4,
                      cutoff=8,
                      lamb=10):
    # _divmax = 20
    Psi_X_T = Psi_X.T

    # placeholder functions
    V = lambda x: x
    pi_hat_star = lambda x: x

    # constants
    n = Psi_X.shape[0]
    d = X.shape[0]
    low, high = action_bounds
    constant = 1 / lamb

    # V^{\pi*_0}
    currentV = np.zeros((1, X.shape[1]))
    lastV = currentV.copy()

    t = 0
    while t < timesteps:
        V_X = currentV.copy()
        B = rrr(Psi_X_T, V_X.T)

        @nb.jit(forceobj=True, fastmath=True)
        def Lv_hat(x, u):
            nablaPsi_x = psi.diff(x.reshape(-1, 1)).reshape((n, d))
            y = np.append(x, x[0]**2).reshape(-1, 1)
            dy_dt = K @ y + D_y * u
            return ((nablaPsi_x.T @ B).T @ F @ dy_dt)[0, 0]

        @nb.jit(forceobj=True, fastmath=True)
        def compute(u, x):
            inner = constant * (reward(x, u) + Lv_hat(x, u))
            return mpexp(inner)

        def pi_hat_star(u, x):  # action given state
            numerator = compute(u, x)
            denominator = qp.quad(compute, low, high, args=(x, ))[0]
            return numerator / denominator

        def compute_2(u, x):
            eval_pi_hat_star = pi_hat_star(u, x)
            return (reward(x, u) - (lamb * ln(eval_pi_hat_star)) +
                    Lv_hat(x, u)) * eval_pi_hat_star

        def V(x):
            return qp.quad(compute_2, low, high, args=(x, ))[0]

        lastV = currentV
        for i in range(currentV.shape[1]):
            x = X[:, i]
            currentV[:, i] = V(x)
            if (i + 1) % 250 == 0:
                print(i + 1)

        t += 1
        print("Completed learning step", t)

    return currentV, pi_hat_star