def lse(self, L):
        """
        Performs Least Squares Estimation with input data provided and returns 
        the least squares estimate        
        """
        # Unpack the required dynamics data
        Q = self.Q
        R = self.R
        n = self.n
        m = self.m
        ell = self.ell
        kronA = self.kronA
        kronB = self.kronB

        # Perform least-squares estimation
        Q_est = np.copy(Q)
        R_est = np.copy(R)
        if self.unknown_params.Q and self.unknown_params.R:
            S = np.hstack([kronA, kronB])
            vecTheta = mdot(la.pinv(S), vec(L))
            vecQ_est = vecTheta[0:ell**2]
            vecR_est = vecTheta[ell**2:]
            Q_est = np.reshape(vecQ_est, [n, n])
            R_est = np.reshape(vecR_est, [m, m])
        elif not self.unknown_params.Q and self.unknown_params.R:
            S = np.copy(kronB)
            vecCW = mdot(kronA, vec(Q))
            vecR_est = mdot(la.pinv(S), vec(L) - vecCW)
            R_est = np.reshape(vecR_est, [m, m])
        elif self.unknown_params.Q and not self.unknown_params.R:
            S = np.copy(kronA)
            vecCV = mdot(kronB, vec(R))
            vecQ_est = mdot(la.pinv(S), vec(L) - vecCV)
            Q_est = np.reshape(vecQ_est, [n, n])
        return Q_est, R_est
Пример #2
0
def dare_obj(obj):
    if isinstance(obj,LQRSys) and not isinstance(obj,LQRSysMult):
        Pare = dare(obj.A,obj.B,obj.Q,obj.R)
        Kare = -la.solve((obj.R+mdot(obj.B.T,Pare,obj.B)),
                         mdot(obj.B.T,Pare,obj.A))
        return Pare,Kare
    elif isinstance(obj,LQRSys) and isinstance(obj,LQRSysMult):
        return dare_mult(obj.A,obj.B,obj.a,obj.Aa,obj.b,obj.Bb,obj.Q,obj.R)
Пример #3
0
 def func2(y):
     eta = theta*y
     LHS = Q + mdot(K.T, R, K)
     for i in range(p):
         LHS += a[i]*mdot(Aa[i].T, P, Aa[i])
     RHS = np.zeros_like(LHS)
     for i in range(p):
         RHS += eta[i]*positive_definite_part(mdot(Aa[i].T, P, ABK) + mdot(ABK.T, P, Aa[i]))
         for j in range(p):
             RHS += eta[i]*eta[j]*positive_definite_part(mdot(Aa[i].T, P, Aa[j]) + mdot(Aa[j].T, P, Aa[i]))
     if is_pos_def(LHS - RHS):
         return True
     else:
         return None
Пример #4
0
def dlyap_obj(obj,matrixtype='P',algo='iterative',show_warn=False,check_pd=False,P00=None,S00=None):
    # obj is a LQRSys or LQRSysMult instance
    if isinstance(obj,LQRSys) and not isinstance(obj,LQRSysMult):
        if matrixtype=='P':
            AA = obj.AK.T
            QQ = obj.Q + sympart(mdot(obj.K.T,obj.R,obj.K))
            P = dlyap(AA,QQ)
            if check_pd:
                if not is_pos_def(P):
                    P = np.full_like(P,np.inf)
        elif matrixtype=='S':
            AA = obj.AK
            QQ = obj.S0
            S = dlyap(AA,QQ)
            if check_pd:
                if not is_pos_def(S):
                    S = np.full_like(S,np.inf)
        elif matrixtype=='PS':
            P = dlyap_obj(obj,'P',algo,show_warn,check_pd,P00,S00)
            S = dlyap_obj(obj,'S',algo,show_warn,check_pd,P00,S00)
        if matrixtype=='P':
            return P
        elif matrixtype=='S':
            return S
        elif matrixtype=='PS':
            return P,S
    elif isinstance(obj,LQRSys) and isinstance(obj,LQRSysMult):
        return dlyap_mult(obj.A,obj.B,obj.K,obj.a,obj.Aa,obj.b,
                          obj.Bb,obj.Q,obj.R,obj.S0,matrixtype=matrixtype,
                          algo=algo,show_warn=show_warn,check_pd=check_pd,
                          P00=P00,S00=S00)
Пример #5
0
def algo1(A, B, Aa, Q, R, theta):
    def calc_control(z):
        a = theta*z
        return gare(A, B, a, Aa, Q, R)

    def func(y):
        return calc_control(y)[0]

    z = bisection(y_lwr_0=0, y_upr_0=1, objective=func)
    a = theta*z
    P, K = calc_control(z)

    ABK = A + mdot(B, K)

    def func2(y):
        eta = theta*y
        LHS = Q + mdot(K.T, R, K)
        for i in range(p):
            LHS += a[i]*mdot(Aa[i].T, P, Aa[i])
        RHS = np.zeros_like(LHS)
        for i in range(p):
            RHS += eta[i]*positive_definite_part(mdot(Aa[i].T, P, ABK) + mdot(ABK.T, P, Aa[i]))
            for j in range(p):
                RHS += eta[i]*eta[j]*positive_definite_part(mdot(Aa[i].T, P, Aa[j]) + mdot(Aa[j].T, P, Aa[i]))
        if is_pos_def(LHS - RHS):
            return True
        else:
            return None

    y = bisection(y_lwr_0=0, y_upr_0=1, objective=func2)
    eta = theta*y

    return K, eta
Пример #6
0
def lqr_gradient(obj):
    """Calculate the cost (policy) gradient of an LQR system"""
    # obj is a LQRSys or LQRSysMult instance

    if obj.P is None:
        raise Exception('Attempted to compute gradient of system with undefined cost P matrix!')

    if isinstance(obj, LQRSys) and not isinstance(obj, LQRSysMult):
        RK = obj.R + mdot(obj.B.T,obj.P,obj.B)
    elif isinstance(obj, LQRSys) and isinstance(obj, LQRSysMult):
        # Uncertain part
        BPBunc = np.zeros([obj.m,obj.m])
        for j in range(obj.q):
            BPBunc += obj.b[j]*mdot(obj.Bb[:,:,j].T, obj.P, obj.Bb[:,:,j])
        RK = obj.R + mdot(obj.B.T, obj.P, obj.B) + BPBunc
    EK = np.dot(RK,obj.K) + mdot(obj.B.T,obj.P,obj.A)
    # Compute gradient
    grad = 2*np.dot(EK,obj.S)
    return grad, RK, EK
Пример #7
0
def check_mss_obj(obj):
    """Check whether or not system is closed-loop mean-square stable"""
    # Options
    max_iters = 1000
    epsilon = 1e-6
    # Initialize
    iterc = 0
    stop_early = False
    converged = False
    stop = False
    x0 = np.ones([obj.n, 1])
    S00 = np.dot(x0, x0.T)
    S00n = la.norm(S00, 'fro')
    S = S00
    #    norm_hist = np.zeros(max_iters+1)
    while not stop:
        # Record previous iterate
        S_prev = S
        #        norm_hist[iterc] = la.norm(S)
        # Recurse
        if isinstance(obj, LQRSys) and not isinstance(obj, LQRSysMult):
            S = mdot(obj.AK, S, obj.AK.T)
        elif isinstance(obj, LQRSys) and isinstance(obj, LQRSysMult):
            # Intermediate expressions
            ASAunc = np.zeros([obj.n, obj.n])
            for i in range(obj.p):
                ASAunc += obj.a[i] * mdot(obj.Aa[:, :, i], S, obj.Aa[:, :,
                                                                     i].T)
            BSBunc = np.zeros([obj.n, obj.n])
            for j in range(obj.q):
                BSBunc += obj.b[j] * mdot(obj.Bb[:, :, j], obj.K, S, obj.K.T,
                                          obj.Bb[:, :, j].T)
            S = mdot(obj.AK, S, obj.AK.T) + ASAunc + BSBunc
        # Check for stopping condition
        if la.norm(S - S_prev, 'fro') / S00n < epsilon:
            converged = True
        if iterc >= max_iters:
            stop_early = True
        else:
            iterc += 1
        stop = converged or stop_early
    #    norm_hist = norm_hist[0:iterc]
    return converged
Пример #8
0
def gare(A, B, a, Aa, Q, R):
    # Options
    max_iters = 1000
    epsilon = 1e-6
    Pelmax = 1e40
    n = A.shape[1]
    p = len(a)
    # Initialize
    P = Q
    iterc = 0
    stop_early = False
    converged = False
    stop = False

    while not stop:
        # Record previous iterate
        P_prev = P
        # Certain part
        APAcer = mdot(A.T, P, A)
        BPBcer = mdot(B.T, P, B)
        # Uncertain part
        APAunc = np.zeros([n, n])
        for i in range(p):
            APAunc += a[i]*mdot(Aa[i].T, P, Aa[i])
        APAsum = APAcer + APAunc
        BPBsum = np.copy(BPBcer)
        # Recurse
        P = Q + APAsum - mdot(A.T, P, B, la.solve(R + BPBsum, B.T), P, A)
        # Check for stopping condition
        if la.norm(P - P_prev, 'fro')/la.norm(P, 'fro') < epsilon:
            converged = True
        if iterc >= max_iters or np.any(np.abs(P) > Pelmax):
            stop_early = True
        else:
            iterc += 1
        stop = converged or stop_early
    # Compute the gains
    if stop_early:
        P, K = None, None
    else:
        K = -mdot(la.solve(R + BPBsum, B.T), P, A)
    return P, K
def model_check(A, B, Q, R, K, op=True, model_type=None):
    """Check stability of system (A,B) with costs Q, R under the feedback gain K """
    # A, B, C = sample_ABCrand(problem_data_true)
    Qbar = Q + mdot(K.T, R, K)
    Abar = A + mdot(B, K)
    # print("Feedback gains test ")
    if model_type != None:
        print(model_type)
    if is_pos_def(dlyap(Abar, Qbar)) == 1:  #if pos_def => gain stabilizes
        if op == True:
            print(
                "Policy Iteration on model stabilizes the system - D-Lyap is Positive Definite.\n"
            )
        ret = True
    else:
        if op == True:
            print(
                "Policy Iteration on model does not stabilize the system - D-Lyap is NOT Positive Definite.\n"
            )
        ret = False
    return ret
Пример #10
0
def dare_mult(A, B, a, Aa, b, Bb, Q, R, algo='iterative', show_warn=False):
    if algo == 'iterative':
        # Options
        max_iters = 1000
        epsilon = 1e-6
        Pelmax = 1e20
        n = A.shape[1]
        m = B.shape[1]
        p = len(a)
        q = len(b)
        # Initialize
        P = Q
        iterc = 0
        stop_early = False
        converged = False
        stop = False

        while not stop:
            # Record previous iterate
            P_prev = P
            # Certain part
            APAcer = mdot(A.T, P, A)
            BPBcer = mdot(B.T, P, B)
            # Uncertain part
            APAunc = np.zeros([n, n])
            for i in range(p):
                APAunc += a[i] * mdot(Aa[:, :, i].T, P, Aa[:, :, i])
            BPBunc = np.zeros([m, m])
            for j in range(q):
                BPBunc += b[j] * mdot(Bb[:, :, j].T, P, Bb[:, :, j])
            APAsum = APAcer + APAunc
            BPBsum = BPBcer + BPBunc
            # Recurse
            P = Q + APAsum - mdot(A.T, P, B, la.solve(R + BPBsum, B.T), P, A)
            # Check for stopping condition
            if la.norm(P - P_prev, 'fro') / la.norm(P, 'fro') < epsilon:
                converged = True
            if iterc >= max_iters or np.any(np.abs(P) > Pelmax):
                stop_early = True
            else:
                iterc += 1
            stop = converged or stop_early
        # Compute the gains
        if stop_early:
            if show_warn:
                warnings.simplefilter('always', UserWarning)
                warn(
                    "Recursion failed, ensure system is mean square stabilizable "
                    "or increase maximum iterations")
            P = None
            K = None
        else:
            K = -mdot(la.solve(R + BPBsum, B.T), P, A)
    return P, K
    def obsvp(self, p):
        """
        Returns a p-step observability matrix
        Input Parameters:
        p: Dimension of the required observability matrix
        """
        F = self.F
        H = self.H
        n = self.n
        m = self.m

        # Build the observability matrix
        O = np.zeros([m * p, n])
        O[0:m] = np.copy(H)
        for k in range(1, p):
            O[m * k:m * (k + 1)] = mdot(O[m * (k - 1):m * k], F)
        return O
def model_based_robust_stabilization_experiment():
    seed = 1
    npr.seed(seed)

    problem_data_true, problem_data = gen_double_spring_mass()

    problem_data_keys = [
        'A', 'B', 'C', 'Ai', 'Bj', 'Ck', 'varAi', 'varBj', 'varCk', 'Q', 'R',
        'S'
    ]
    A, B, C, Ai, Bj, Ck, varAi, varBj, varCk, Q, R, S = [
        problem_data[key] for key in problem_data_keys
    ]

    n, m, p = [M.shape[1] for M in [A, B, C]]
    q, r, s = [M.shape[0] for M in [Ai, Bj, Ck]]

    # Synthesize controllers using various uncertainty modeling terms

    # Modify problem data_files
    # LQR w/ game adversary
    # Setting varAi, varBj, varCk = 0 => no multiplicative noise on the game
    problem_data_model_n = copy.deepcopy(problem_data)
    problem_data_model_n['varAi'] *= 0
    problem_data_model_n['varBj'] *= 0
    problem_data_model_n['varCk'] *= 0

    # LQR w/ multiplicative noise
    # Setting C = 0 and varCk = 0 => no game adversary
    problem_data_model_m = copy.deepcopy(problem_data)
    problem_data_model_m['C'] *= 0
    problem_data_model_m['varCk'] *= 0

    # Simulation options
    sim_options = None
    num_iterations = 50
    problem_data_known = True

    # Policy iteration on LQR w/ game adversary and multiplicative noise
    K0, L0 = get_initial_gains(problem_data, initial_gain_method='dare')
    print("LQR w/ game adversary and multiplicative noise")
    P_pi, K_pi, L_pi, H_pi, P_history_pi, K_history_pi, L_history_pi, c_history_pi, H_history_pi = policy_iteration(
        problem_data, problem_data_known, K0, L0, sim_options, num_iterations)
    verify_gare(problem_data,
                P_pi,
                algo_str='Policy iteration - Game w/ Multiplicative noise')

    # Check concavity condition
    Qvv_pi = -S + mdot(C.T, P_pi, C) + np.sum(
        [varCk[k] * mdot(Ck[k].T, P_pi, Ck[k]) for k in range(s)], axis=0)
    if not is_pos_def(-Qvv_pi):
        raise Exception(
            'Problem fails the concavity condition, adjust adversary strength')

    # Check positive definiteness condition
    QKL_pi = Q + mdot(K_pi.T, R, K_pi) - mdot(L_pi.T, S, L_pi)
    if not is_pos_def(QKL_pi):
        raise Exception(
            'Problem fails the positive definiteness condition, adjust adversary strength'
        )
    print(QKL_pi)

    # Policy Iteration on LQR w/ game adversary
    K0n, L0n = get_initial_gains(problem_data_model_n,
                                 initial_gain_method='dare')
    print("LQR w/ game adversary")
    Pn_pi, Kn_pi, Ln_pi, Hn_pi, Pn_history_pi, Kn_history_pi, Ln_history_pi, cn_history_pi, Hn_history_pi = policy_iteration(
        problem_data_model_n, problem_data_known, K0n, L0n, sim_options,
        num_iterations)
    verify_gare(problem_data_model_n,
                Pn_pi,
                algo_str='Policy iteration - Game w/o Multiplicative noise')

    # Policy Iteration on LQR w/ multiplicative noise
    K0m, L0m = get_initial_gains(problem_data_model_m,
                                 initial_gain_method='dare')
    print("LQR w/ multiplicative noise")
    Pm_pi, Km_pi, Lm_pi, Hm_pi, Pm_history_pi, Km_history_pi, Lm_history_pi, cm_history_pi, Hm_history_pi = policy_iteration(
        problem_data_model_m, problem_data_known, K0m, L0m, sim_options,
        num_iterations)
    verify_gare(problem_data_model_m,
                Pm_pi,
                algo_str='Policy iteration - LQR w/ Multiplicative noise')

    # LQR on true system
    A_true, B_true, Q_true, R_true = [
        problem_data_true[key] for key in ['A', 'B', 'Q', 'R']
    ]
    n_true, m_true = [M.shape[1] for M in [A_true, B_true]]
    Pare_true, Kare_true = dare_gain(A_true, B_true, Q_true, R_true)

    # LQR on nominal system, no explicit robust control design
    Pce, Kce = dare_gain(A, B, Q, R)

    # Check if synthesized controllers stabilize the true system
    K_pi_true = np.hstack([K_pi, np.zeros([m, n])])
    Kn_pi_true = np.hstack([Kn_pi, np.zeros([m, n])])
    Km_pi_true = np.hstack([Km_pi, np.zeros([m, n])])
    Kce_true = np.hstack([Kce, np.zeros([m, n])])
    Kol_true = np.zeros_like(Kce_true)

    control_method_strings = [
        'open-loop     ', 'cert equiv    ', 'noise         ', 'game          ',
        'noise + game  ', 'optimal       '
    ]
    K_list = [Kol_true, Kce_true, Km_pi_true, Kn_pi_true, K_pi_true, Kare_true]
    AK_list = [A_true + np.dot(B_true, K) for K in K_list]
    QK_list = [Q_true + mdot(K.T, R_true, K) for K in K_list]
    specrad_list = [specrad(AK) for AK in AK_list]
    cost_list = [
        np.trace(dlyap(AK.T, QK)) if sr < 1 else np.inf
        for AK, QK, sr in zip(AK_list, QK_list, specrad_list)
    ]

    set_numpy_decimal_places(1)

    output_text_filename = 'results_model_based_robust_stabilization_experiment.txt'
    output_text_path = os.path.join('..', 'results', output_text_filename)
    with open(output_text_path, 'w') as f:
        header_str = 'method       |  specrad  |   cost   |  gains'
        print(header_str, file=f)
        for control_method_string, sr, cost, K in zip(control_method_strings,
                                                      specrad_list, cost_list,
                                                      K_list):
            line_str = '%s  %.3f %8s      %s' % (control_method_string, sr,
                                                 '%10.0f' % cost, K)
            print(line_str, file=f)
def rollout(problem_data, K, L, sim_options):
    """Simulate closed-loop state response"""
    problem_data_keys = [
        'A', 'B', 'C', 'Ai', 'Bj', 'Ck', 'varAi', 'varBj', 'varCk', 'Q', 'R',
        'S'
    ]
    A, B, C, Ai, Bj, Ck, varAi, varBj, varCk, Q, R, S = [
        problem_data[key] for key in problem_data_keys
    ]
    n, m, p = [M.shape[1] for M in [A, B, C]]

    sim_options_keys = [
        'xstd', 'ustd', 'vstd', 'wstd', 'nt', 'nr', 'group_option'
    ]
    xstd, ustd, vstd, wstd, nt, nr, group_option = [
        sim_options[key] for key in sim_options_keys
    ]

    qfun_estimator = sim_options['qfun_estimator']

    if qfun_estimator == 'direct':
        # First step
        # Sample initial states, defender control inputs, and attacker control inputs
        x0, u0, v0 = [
            npr.randn(nr, dim) * std
            for dim, std in zip([n, m, p], [xstd, ustd, vstd])
        ]

        Qval = np.zeros(nr)

        if group_option == 'single':
            # Iterate over rollouts
            for k in range(nr):
                x0_k, u0_k, v0_k = [var[k] for var in [x0, u0, v0]]

                # Initialize
                x = np.copy(x0_k)

                # Iterate over timesteps
                for i in range(nt):
                    # Compute controls
                    if i == 0:
                        u, v = np.copy(u0_k), np.copy(v0_k)
                    else:
                        u, v = np.dot(K, x), np.dot(L, x)

                    # Accumulate cost
                    Qval[k] += mdot(x.T, Q, x) + mdot(u.T, R, u) - mdot(
                        v.T, S, v)

                    # Randomly sample state transition matrices using multiplicative noise
                    Arand, Brand, Crand = sample_ABCrand(problem_data)

                    # Additive noise
                    w = npr.randn(n) * wstd

                    # Transition the state using multiplicative and additive noise
                    x = np.dot(Arand, x) + np.dot(Brand, u) + np.dot(Crand,
                                                                     v) + w

        elif group_option == 'group':
            # Randomly sample state transition matrices using multiplicative noise
            Arand_all, Brand_all, Crand_all = sample_ABCrand_multi(
                problem_data, nt, nr)

            # Randomly sample additive noise
            w_all = npr.randn(nt, nr, n) * wstd

            # Initialize
            x = np.copy(x0)

            # Iterate over timesteps
            for i in range(nt):
                # Compute controls
                if i == 0:
                    u = np.copy(u0)
                    v = np.copy(v0)
                else:
                    u = groupdot(K, x)
                    v = groupdot(L, x)

                # Accumulate cost
                Qval += groupquadform(Q, x) + groupquadform(
                    R, u) - groupquadform(S, v)

                # Look up stochastic dynamics and additive noise
                Arand, Brand, Crand = Arand_all[i], Brand_all[i], Crand_all[i]
                w = w_all[i]

                # Transition the state using multiplicative and additive noise
                x = groupdot(Arand, x) + groupdot(Brand, u) + groupdot(
                    Crand, v) + w

        return x0, u0, v0, Qval

    elif qfun_estimator == 'lsadp' or qfun_estimator == 'lstdq':
        # Sample initial states, defender control inputs, and attacker control inputs
        x0 = xstd * npr.randn(nr, n)
        u_explore_hist = ustd * npr.randn(nr, nt, m)
        v_explore_hist = vstd * npr.randn(nr, nt, p)

        x_hist = np.zeros([nr, nt, n])
        u_hist = np.zeros([nr, nt, m])
        v_hist = np.zeros([nr, nt, p])
        c_hist = np.zeros([nr, nt])

        if group_option == 'single':
            # Iterate over rollouts
            for k in range(nr):
                # Initialize
                x = np.copy(x0[k])

                # Iterate over timesteps
                for i in range(nt):
                    # Compute controls
                    u = np.dot(K, x) + u_explore_hist[k, i]
                    v = np.dot(L, x) + v_explore_hist[k, i]

                    # Compute cost
                    c = mdot(x.T, Q, x) + mdot(u.T, R, u) - mdot(v.T, S, v)

                    # Record history
                    x_hist[k, i] = x
                    u_hist[k, i] = u
                    v_hist[k, i] = v
                    c_hist[k, i] = c

                    # Randomly sample state transition matrices using multiplicative noise
                    Arand, Brand, Crand = sample_ABCrand(problem_data)

                    # Additive noise
                    w = npr.randn(n) * wstd

                    # Transition the state using multiplicative and additive noise
                    x = np.dot(Arand, x) + np.dot(Brand, u) + np.dot(Crand,
                                                                     v) + w

        elif group_option == 'group':
            # Randomly sample state transition matrices using multiplicative noise
            Arand_all, Brand_all, Crand_all = sample_ABCrand_multi(
                problem_data, nt, nr)

            # Randomly sample additive noise
            w_all = npr.randn(nt, nr, n) * wstd

            # Initialize
            x = np.copy(x0)

            # Iterate over timesteps
            for i in range(nt):
                # Compute controls
                u = groupdot(K, x) + u_explore_hist[:, i]
                v = groupdot(L, x) + v_explore_hist[:, i]

                # Compute cost
                c = groupquadform(Q, x) + groupquadform(R, u) - groupquadform(
                    S, v)

                # Record history
                x_hist[:, i] = x
                u_hist[:, i] = u
                v_hist[:, i] = v
                c_hist[:, i] = c

                # Look up stochastic dynamics and additive noise
                Arand, Brand, Crand = Arand_all[i], Brand_all[i], Crand_all[i]
                w = w_all[i]

                # Transition the state using multiplicative and additive noise
                x = groupdot(Arand, x) + groupdot(Brand, u) + groupdot(
                    Crand, v) + w

        return x_hist, u_hist, v_hist, c_hist
def qfun(problem_data,
         problem_data_known=None,
         P=None,
         K=None,
         L=None,
         sim_options=None,
         output_format=None):
    """Compute or estimate Q-function matrix"""
    if problem_data_known is None:
        problem_data_known = True
    if output_format is None:
        output_format = 'list'
    problem_data_keys = [
        'A', 'B', 'C', 'Ai', 'Bj', 'Ck', 'varAi', 'varBj', 'varCk', 'Q', 'R',
        'S'
    ]
    A, B, C, Ai, Bj, Ck, varAi, varBj, varCk, Q, R, S = [
        problem_data[key] for key in problem_data_keys
    ]
    n, m, p = [M.shape[1] for M in [A, B, C]]
    q, r, s = [M.shape[0] for M in [Ai, Bj, Ck]]

    if P is None:
        P = gdlyap(problem_data, K, L)

    if problem_data_known:
        APAi = np.sum([varAi[i] * mdot(Ai[i].T, P, Ai[i]) for i in range(q)],
                      axis=0)
        BPBj = np.sum([varBj[j] * mdot(Bj[j].T, P, Bj[j]) for j in range(r)],
                      axis=0)
        CPCk = np.sum([varCk[k] * mdot(Ck[k].T, P, Ck[k]) for k in range(s)],
                      axis=0)

        Qxx = Q + mdot(A.T, P, A) + APAi
        Quu = R + mdot(B.T, P, B) + BPBj
        Qvv = -S + mdot(C.T, P, C) + CPCk
        Qux = mdot(B.T, P, A)
        Qvx = mdot(C.T, P, A)
        Qvu = mdot(C.T, P, B)
    else:
        nr = sim_options['nr']
        nt = sim_options['nt']
        qfun_estimator = sim_options['qfun_estimator']

        if qfun_estimator == 'direct':
            # Simulation data_files collection
            x0, u0, v0, Qval = rollout(problem_data, K, L, sim_options)

            # Dimensions
            Qpart_shapes = [[n, n], [m, m], [p, p], [m, n], [p, n], [p, m]]
            Qvec_part_lengths = [np.prod(shape) for shape in Qpart_shapes]

            # Least squares estimation
            xuv_data = np.zeros([nr, np.sum(Qvec_part_lengths)])
            for i in range(nr):
                x = x0[i]
                u = u0[i]
                v = v0[i]
                xuv_data[i] = np.hstack([
                    kron(x.T, x.T),
                    kron(u.T, u.T),
                    kron(v.T, v.T), 2 * kron(x.T, u.T), 2 * kron(x.T, v.T),
                    2 * kron(u.T, v.T)
                ])

            # Solve the least squares problem
            Qvec = la.lstsq(xuv_data, Qval, rcond=None)[0]

            # Split and reshape the solution vector into the appropriate matrices
            idxi = [0]
            Qvec_parts = []
            Q_parts = []
            for i, part_length in enumerate(Qvec_part_lengths):
                idxi.append(idxi[i] + part_length)
                Qvec_parts.append(Qvec[idxi[i]:idxi[i + 1]])
                Q_parts.append(np.reshape(Qvec_parts[i], Qpart_shapes[i]))

            Qxx, Quu, Qvv, Qux, Qvx, Qvu = Q_parts

        elif qfun_estimator == 'lsadp':
            # Simulation data_files collection
            x_hist, u_hist, v_hist, c_hist = rollout(problem_data, K, L,
                                                     sim_options)

            # Form the data_files matrices
            ns = nr * (nt - 1)
            nz = int(((n + m + p + 1) * (n + m + p)) / 2)
            mu_hist = np.zeros([nr, nt, nz])
            nu_hist = np.zeros([nr, nt, nz])

            def phi(x):
                return svec2(np.outer(x, x))

            for i in range(nr):
                for j in range(nt):
                    z = np.concatenate(
                        [x_hist[i, j], u_hist[i, j], v_hist[i, j]])
                    w = np.concatenate([
                        x_hist[i, j],
                        np.dot(K, x_hist[i, j]),
                        np.dot(L, x_hist[i, j])
                    ])
                    mu_hist[i, j] = phi(z)
                    nu_hist[i, j] = phi(w)
            Y = np.zeros(ns)
            Z = np.zeros([ns, nz])
            for i in range(nr):
                lwr = i * (nt - 1)
                upr = (i + 1) * (nt - 1)
                Y[lwr:upr] = c_hist[i, 0:-1]
                Z[lwr:upr] = mu_hist[i, 0:-1] - nu_hist[i, 1:]

            # Solve the least squares problem
            # H_svec = la.lstsq(Z, Y, rcond=None)[0]
            # H = smat(H_svec)
            H_svec2 = la.lstsq(Z, Y, rcond=None)[0]
            H = smat2(H_svec2)

            Qxx = H[0:n, 0:n]
            Quu = H[n:n + m, n:n + m]
            Qvv = H[n + m:, n + m:]
            Qux = H[n:n + m, 0:n]
            Qvx = H[n + m:, 0:n]
            Qvu = H[n + m:, n:n + m]

        elif qfun_estimator == 'lstdq':
            # Simulation data_files collection
            x_hist, u_hist, v_hist, c_hist = rollout(problem_data, K, L,
                                                     sim_options)

            # Form the data_files matrices
            nz = int(((n + m + p + 1) * (n + m + p)) / 2)
            mu_hist = np.zeros([nr, nt, nz])
            nu_hist = np.zeros([nr, nt, nz])

            def phi(x):
                return svec2(np.outer(x, x))

            for i in range(nr):
                for j in range(nt):
                    z = np.concatenate(
                        [x_hist[i, j], u_hist[i, j], v_hist[i, j]])
                    w = np.concatenate([
                        x_hist[i, j],
                        np.dot(K, x_hist[i, j]),
                        np.dot(L, x_hist[i, j])
                    ])
                    mu_hist[i, j] = phi(z)
                    nu_hist[i, j] = phi(w)

            Y = np.zeros(nr * nz)
            Z = np.zeros([nr * nz, nz])
            for i in range(nr):
                lwr = i * nz
                upr = (i + 1) * nz
                for j in range(nt - 1):
                    Y[lwr:upr] += mu_hist[i, j] * c_hist[i, j]
                    Z[lwr:upr] += np.outer(mu_hist[i, j],
                                           mu_hist[i, j] - nu_hist[i, j + 1])

            H_svec2 = la.lstsq(Z, Y, rcond=None)[0]
            H = smat2(H_svec2)

            Qxx = H[0:n, 0:n]
            Quu = H[n:n + m, n:n + m]
            Qvv = H[n + m:, n + m:]
            Qux = H[n:n + m, 0:n]
            Qvx = H[n + m:, 0:n]
            Qvu = H[n + m:, n:n + m]

    if output_format == 'list':
        outputs = Qxx, Quu, Qvv, Qux, Qvx, Qvu
    elif output_format == 'matrix':
        outputs = np.block([[Qxx, Qux.T, Qvx.T], [Qux, Quu, Qvu.T],
                            [Qvx, Qvu, Qvv]])
        # ABC = np.hstack([A, B, C])
        # X = sla.block_diag(Q, R, -S)
        # Y = mdot(ABC.T, P, ABC)
        # Z = sla.block_diag(APAi, BPBj, CPCk)
        # outputs = X + Y + Z
    return outputs
    def get_buffer_size(self):
        """
        Returns the size of the measurement history buffer & corresponding 
        observability matrix
        """
        n = self.n
        m = self.m
        ell = self.ell
        F = self.F
        H = self.H
        Mo = 0
        p = 0
        while not la.matrix_rank(
                Mo) == n:  # valid only if system is observable
            p += 1
            Mo = self.obsvp(p)
        Mopi = la.pinv(Mo)

        # Build the stacked H matrix as Mw
        if p > 1:
            Mw = np.zeros([m * p, ell * (p - 1)])
            Mw[0:m, 0:ell] = np.copy(H)
            for j in range(1, p - 1):
                Mw[0:m, ell * j:ell * (j + 1)] = mdot(
                    Mw[0:m, ell * (j - 1):ell * j], F)
            for i in range(1, p - 1):
                Mw[m * i:m * (i + 1), ell * i:] = Mw[0:m, 0:ell * (p - i - 1)]

            # Product matrix: Mopi*Mw
            MopiMw = mdot(Mopi, Mw)

            # Form the 'script' A matrix as Anew - Aold
            Anew = np.hstack([MopiMw, np.eye(ell)])
            Aold = np.hstack([np.zeros([ell, ell]), mdot(F, MopiMw)])
            A = Anew - Aold

            # Form the 'script' B matrix as Bnew - Bold
            Bnew = np.hstack([Mopi, np.zeros([ell, m])])
            Bold = np.hstack([np.zeros([ell, m]), mdot(F, Mopi)])
            B = Bnew - Bold
        else:
            A = np.eye(ell)
            B = np.hstack([Mopi, -mdot(F, Mopi)])

        # Get the A_i and B_i sequences
        Ai = np.zeros([p, ell, ell])
        Bi = np.zeros([p + 1, ell, m])
        for i in range(p + 1):
            if i > 0:
                Ai[p - i] = A[:, ell * (i - 1):ell * i]
            Bi[p - i] = B[:, m * i:m * (i + 1)]

        # Form Kronecker products of the A_i's and B_i's
        kronA = np.zeros([ell**2, ell**2])
        kronB = np.zeros([ell**2, m**2])
        for i in range(p + 1):
            if i < p:
                kronA += np.kron(Ai[i], Ai[i])
            kronB += np.kron(Bi[i], Bi[i])

        return p, Mopi, kronA, kronB
Пример #16
0
 def calc_QK(self):
     self._QK = self.Q + mdot(self.K.T, self.R, self.K)
     return self._QK
Пример #17
0
    # Define the true and nominal dynamics and LQR costs
    n, m, A_true, B_true, Q, R, A, B = system_inverted_pendulum()

    # Define uncertainty directions and magnitudes
    p = 1
    Ai = np.zeros([p, n, n])
    Ai[0, 1, 0] = 1
    eta_bar = np.ones(p)

    # Compute robust optimal gains
    K1, eta1 = algo1(A, B, Ai, Q, R, eta_bar)
    K2, eta2 = algo2(A, B, Ai, Q, R, eta_bar)

    # Compute the certainty-equivalent control
    Pce = dare(A, B, Q, R)
    Kce = -la.solve((R + mdot(B.T, Pce, B)), mdot(B.T, Pce, A))


    def check_random_systems(K, eta, R, bound_type):
        s = np.zeros(R)
        if bound_type == "unidirectional":
            r = np.linspace(0, 1, R)
        elif bound_type == "bidirectional":
            r = np.linspace(-1, 1, R)
        for j in range(R):
            Arand = np.copy(A)
            for i in range(p):
                Arand += eta[i]*r[j]*Ai[i]
            s[j] = specrad(Arand + B_true@K)
        return s
Пример #18
0
def estimate_model(n,
                   m,
                   A,
                   B,
                   SigmaA,
                   SigmaB,
                   nr,
                   ell,
                   x_hist,
                   u_mean_hist,
                   u_covr_hist,
                   display_estimates=False,
                   AB_known=False):
    muhat_hist = np.zeros([ell + 1, n])
    Xhat_hist = np.zeros([ell + 1, n * n])
    What_hist = np.zeros([ell + 1, n * m])

    # First stage: mean dynamics parameter estimation
    if AB_known:
        Ahat = np.copy(A)
        Bhat = np.copy(B)
    else:
        # Form data matrices for least-squares estimation
        for t in range(ell + 1):
            muhat_hist[t] = (1 / nr) * np.sum(x_hist[t], axis=0)
            Xhat_hist[t] = (1 / nr) * vec(
                np.sum(np.einsum('...i,...j', x_hist[t], x_hist[t]), axis=0))
            if t < ell:
                # What_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j',x_hist[t],u_mean_hist[t]),axis=0))
                What_hist[t] = vec(np.outer(muhat_hist[t], u_mean_hist[t]))
        Y = muhat_hist[1:].T
        Z = np.vstack([muhat_hist[0:-1].T, u_mean_hist.T])
        # Solve least-squares problem
        # Thetahat = mdot(Y, Z.T, la.pinv(mdot(Z, Z.T)))
        Thetahat = la.lstsq(Z.T, Y.T, rcond=None)[0].T
        # Split learned model parameters
        Ahat = Thetahat[:, 0:n]
        Bhat = Thetahat[:, n:n + m]

    AAhat = np.kron(Ahat, Ahat)
    ABhat = np.kron(Ahat, Bhat)
    BAhat = np.kron(Bhat, Ahat)
    BBhat = np.kron(Bhat, Bhat)

    # Second stage: covariance dynamics parameter estimation
    # Form data matrices for least-squares estimation
    C = np.zeros([ell, n * n]).T
    Uhat_hist = np.zeros([ell, m * m])
    for t in range(ell):
        Uhat_hist[t] = vec(u_covr_hist[t] +
                           np.outer(u_mean_hist[t], u_mean_hist[t]))
        Cminus = mdot(AAhat, Xhat_hist[t]) + mdot(BAhat, What_hist[t]) + mdot(
            ABhat, What_hist[t].T) + mdot(BBhat, Uhat_hist[t])
        C[:, t] = Xhat_hist[t + 1] - Cminus
    D = np.vstack([Xhat_hist[0:-1].T, Uhat_hist.T])
    # Solve least-squares problem
    # SigmaThetahat_prime = mdot(C, D.T, la.pinv(mdot(D,D.T)))
    SigmaThetahat_prime = la.lstsq(D.T, C.T, rcond=None)[0].T
    # Split learned model parameters
    SigmaAhat_prime = SigmaThetahat_prime[:, 0:n * n]
    SigmaBhat_prime = SigmaThetahat_prime[:, n * n:n * (n + m)]

    # Reshape and project the noise covariance estimates onto the semidefinite cone
    SigmaAhat = reshaper(SigmaAhat_prime, n, n, n, n)
    SigmaBhat = reshaper(SigmaBhat_prime, n, m, n, m)
    SigmaAhat = positive_semidefinite_part(SigmaAhat)
    SigmaBhat = positive_semidefinite_part(SigmaBhat)

    if display_estimates:
        prettyprint(Ahat, "Ahat")
        prettyprint(A, "A   ")
        prettyprint(Bhat, "Bhat")
        prettyprint(B, "B   ")
        prettyprint(SigmaAhat, "SigmaAhat")
        prettyprint(SigmaA, "SigmaA   ")
        prettyprint(SigmaBhat, "SigmaBhat")
        prettyprint(SigmaB, "SigmaB   ")

    return Ahat, Bhat, SigmaAhat, SigmaBhat
    def run(self, T):
        """
        Perform adaptive Kalman filter iterations
        Input Parameters:
        T: Integer of maximum filter iterations
        """
        F = self.F
        H = self.H
        Q = self.Q
        R = self.R
        n = self.n
        m = self.m
        p = self.p
        Mopi = self.Mopi
        x_mean0 = self.x_mean0
        x_covr0 = self.x_covr0
        Q_est0 = self.Q_est0
        R_est0 = self.R_est0
        L0 = self.L0

        # Preallocate history data arrays
        x_hist = np.full((T + 1, n), np.nan)
        y_hist = np.full((T + 1, m), np.nan)
        x_pre_hist = np.full((T + 1, n), np.nan)
        x_post_hist = np.full((T + 1, n), np.nan)
        P_pre_hist = np.full((T + 1, n, n), np.nan)
        P_post_hist = np.full((T + 1, n, n), np.nan)
        K_hist = np.full((T + 1, n, m), np.nan)
        Q_est_hist = np.full((T + 1, n, n), np.nan)
        R_est_hist = np.full((T + 1, m, m), np.nan)

        # Initialize the iterates
        x_post = x_mean0
        P_post = x_covr0
        Q_est = Q_est0
        R_est = R_est0
        L = L0
        x = npr.multivariate_normal(x_mean0, x_covr0)
        x_hist[0] = x
        x_post_hist[0] = x_post
        P_post_hist[0] = P_post

        # Perform dynamic adaptive Kalman filter updates
        for k in range(T):
            # Print the iteration number
            print("k = %9d / %d" % (k + 1, T))
            # Generate a new multivariate Gaussian measurement noise
            v = npr.multivariate_normal(np.zeros(m), R)
            # Collect and store a new measurement
            y = mdot(H, x) + v
            y_hist[k] = y
            # Noise covariance estimation
            if k > p - 1:
                # Collect measurement till 'k-1' time steps
                Yold = vec(y_hist[np.arange(k - 1, k - p - 1, -1)].T)

                # Collect measurement till 'k' time steps
                Ynew = vec(y_hist[np.arange(k, k - p, -1)].T)

                # Formulate a linear stationary time series
                Z = mdot(Mopi, Ynew) - mdot(F, Mopi, Yold)

                # Recursive covariance unbiased estimator
                L = ((k - 1) / k) * L + (1 / k) * np.outer(Z, Z)

                # Get the least squares estimate of selected covariances
                Q_est_new, R_est_new = self.lse(L)

                # Check positive semidefiniteness of estimated covariances
                if is_pos_def(Q_est_new):
                    Q_est = np.copy(Q_est_new)
                if is_pos_def(R_est_new):
                    R_est = np.copy(R_est_new)

            # Update the covariance estimate history
            Q_est_hist[k] = Q_est
            R_est_hist[k] = R_est

            ## Update state estimates using standard Kalman filter equations
            # Calculate the a priori state estimate
            x_pre = mdot(F, x_post)

            # Calculate the a priori error covariance estimate
            P_pre = mdot(F, P_post, F.T) + Q_est

            # Calculate the Kalman gain
            K = solveb(mdot(P_pre, H.T), mdot(H, P_pre, H.T) + R_est)

            # Calculate the a posteriori state estimate
            x_post = x_pre + mdot(K, y - mdot(H, x_pre))

            # Calculate the a posteriori error covariance estimate
            IKH = np.eye(n) - mdot(K, H)
            P_post = mdot(IKH, P_pre, IKH.T) + mdot(K, R, K.T)

            # Store the histories
            x_pre_hist[k + 1] = x_pre
            x_post_hist[k + 1] = x_post
            P_pre_hist[k + 1] = P_pre
            P_post_hist[k + 1] = P_post
            K_hist[k + 1] = K

            # True system updates (true state transition and measurement)
            # Generate process noise
            w = npr.multivariate_normal(np.zeros(n), Q)

            # Update and store the state
            x = mdot(F, x) + w
            x_hist[k + 1] = x

        # Tie up loose ends
        y_hist[-1] = y
        x_pre_hist[0] = x_post_hist[0]
        P_pre_hist[0] = P_post_hist[0]
        K_hist[0] = K_hist[1]
        Q_est_hist[-1] = Q_est
        R_est_hist[-1] = R_est

        # Return data history
        data_hist = DataHist(T, Q_est_hist, R_est_hist, x_hist, x_pre_hist,
                             x_post_hist, P_pre_hist, P_post_hist)
        return data_hist
Пример #20
0
def estimate_model_var_only(n,
                            m,
                            A,
                            B,
                            SigmaA,
                            SigmaB,
                            varAi,
                            varBj,
                            Ai,
                            Bj,
                            nr,
                            ell,
                            x_hist,
                            u_mean_hist,
                            u_covr_hist,
                            display_estimates=False,
                            AB_known=False,
                            detailed_outputs=True):
    muhat_hist = np.zeros([ell + 1, n])
    Xhat_hist = np.zeros([ell + 1, n * n])
    What_hist = np.zeros([ell + 1, n * m])

    # First stage: mean dynamics parameter estimation
    if AB_known:
        Ahat = np.copy(A)
        Bhat = np.copy(B)
    else:
        # Form data matrices for least-squares estimation
        for t in range(ell + 1):
            muhat_hist[t] = (1 / nr) * np.sum(x_hist[t], axis=0)
            Xhat_hist[t] = (1 / nr) * vec(
                np.sum(np.einsum('...i,...j', x_hist[t], x_hist[t]), axis=0))
            if t < ell:
                # What_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j',x_hist[t],u_mean_hist[t]),axis=0))
                What_hist[t] = vec(np.outer(muhat_hist[t], u_mean_hist[t]))
        Y = muhat_hist[1:].T
        Z = np.vstack([muhat_hist[0:-1].T, u_mean_hist.T])
        # Solve least-squares problem
        # Thetahat = mdot(Y, Z.T, la.pinv(mdot(Z, Z.T)))
        Thetahat = la.lstsq(Z.T, Y.T, rcond=None)[0].T
        # Split learned model parameters
        Ahat = Thetahat[:, 0:n]
        Bhat = Thetahat[:, n:n + m]

    AAhat = np.kron(Ahat, Ahat)
    ABhat = np.kron(Ahat, Bhat)
    BAhat = np.kron(Bhat, Ahat)
    BBhat = np.kron(Bhat, Bhat)

    # Second stage: covariance dynamics parameter estimation
    # Form data matrices for least-squares estimation
    C = np.zeros([ell, n * n]).T
    Uhat_hist = np.zeros([ell, m * m])
    for t in range(ell):
        Uhat_hist[t] = vec(u_covr_hist[t] +
                           np.outer(u_mean_hist[t], u_mean_hist[t]))
        Cminus = mdot(AAhat, Xhat_hist[t]) + mdot(BAhat, What_hist[t]) + mdot(
            ABhat, What_hist[t].T) + mdot(BBhat, Uhat_hist[t])
        C[:, t] = Xhat_hist[t + 1] - Cminus

    C = vec(C)

    X1 = Xhat_hist[0:-1].T
    U1 = Uhat_hist.T

    D1 = np.vstack([
        vec(np.dot(np.kron(Ai[i], Ai[i]), X1)) for i in range(np.size(varAi))
    ])
    D2 = np.vstack([
        vec(np.dot(np.kron(Bj[j], Bj[j]), U1)) for j in range(np.size(varBj))
    ])

    D = np.vstack([D1, D2])

    # Solve least-squares problem
    # var_hat = mdot(C, D.T, la.pinv(mdot(D,D.T)))
    # var_hat = mdot(la.pinv(mdot(D, D.T)), D, C)
    var_hat = la.lstsq(D.T, C, rcond=None)[0]

    varAi_hat = np.maximum(var_hat[0:np.size(varAi)], 0)
    varBj_hat = np.maximum(var_hat[np.size(varAi):], 0)

    SigmaAhat = np.sum([
        varAi_hat[i] * np.outer(vec(Ai[i]), vec(Ai[i]))
        for i in range(np.size(varAi))
    ],
                       axis=0)
    SigmaBhat = np.sum([
        varBj_hat[j] * np.outer(vec(Bj[j]), vec(Bj[j]))
        for j in range(np.size(varBj))
    ],
                       axis=0)

    if display_estimates:
        prettyprint(Ahat, "Ahat")
        prettyprint(A, "A   ")
        prettyprint(Bhat, "Bhat")
        prettyprint(B, "B   ")
        prettyprint(SigmaAhat, "SigmaAhat")
        prettyprint(SigmaA, "SigmaA   ")
        prettyprint(SigmaBhat, "SigmaBhat")
        prettyprint(SigmaB, "SigmaB   ")

    if detailed_outputs:
        outputs = Ahat, Bhat, SigmaAhat, SigmaBhat, varAi_hat, varBj_hat
    else:
        outputs = Ahat, Bhat, SigmaAhat, SigmaBhat
    return outputs
Пример #21
0
def dlyap_mult(A,
               B,
               K,
               a,
               Aa,
               b,
               Bb,
               Q,
               R,
               S0,
               matrixtype='P',
               algo='iterative',
               show_warn=False,
               check_pd=False,
               P00=None,
               S00=None):
    n = A.shape[1]
    n2 = n * n
    p = len(a)
    q = len(b)
    AK = A + np.dot(B, K)
    stable = True
    stable2 = True
    if algo == 'linsolve':
        if matrixtype == 'P':
            # Intermediate terms
            Aunc_P = np.zeros([n2, n2])
            for i in range(p):
                Aunc_P = Aunc_P + a[i] * kron(Aa[:, :, i].T)
            BKunc_P = np.zeros([n2, n2])
            for j in range(q):
                BKunc_P = BKunc_P + b[j] * kron(np.dot(K.T, Bb[:, :, j].T))
            # Compute matrix and vector for the linear equation solver
            Alin_P = np.eye(n2) - kron(AK.T) - Aunc_P - BKunc_P
            blin_P = vec(Q) + np.dot(kron(K.T), vec(R))
            # Solve linear equations
            xlin_P = la.solve(Alin_P, blin_P)
            # Reshape
            P = np.reshape(xlin_P, [n, n])
            if check_pd:
                stable = is_pos_def(P)
        elif matrixtype == 'S':
            # Intermediate terms
            Aunc_S = np.zeros([n2, n2])
            for i in range(p):
                Aunc_S = Aunc_S + a[i] * kron(Aa[:, :, i])
            BKunc_S = np.zeros([n2, n2])
            for j in range(q):
                BKunc_S = BKunc_S + b[j] * kron(np.dot(Bb[:, :, j], K))
            # Compute matrix and vector for the linear equation solver
            Alin_S = np.eye(n2) - kron(AK) - Aunc_S - BKunc_S
            blin_S = vec(S0)
            # Solve linear equations
            xlin_S = la.solve(Alin_S, blin_S)
            # Reshape
            S = np.reshape(xlin_S, [n, n])
            if check_pd:
                stable = is_pos_def(S)
        elif matrixtype == 'PS':
            P = dlyap_mult(A,
                           B,
                           K,
                           a,
                           Aa,
                           b,
                           Bb,
                           Q,
                           R,
                           S0,
                           matrixtype='P',
                           algo='linsolve')
            S = dlyap_mult(A,
                           B,
                           K,
                           a,
                           Aa,
                           b,
                           Bb,
                           Q,
                           R,
                           S0,
                           matrixtype='S',
                           algo='linsolve')

    elif algo == 'iterative':
        # Implicit iterative solution to generalized discrete Lyapunov equation
        # Inspired by https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7553367
        # In turn inspired by https://pdf.sciencedirectassets.com/271503/1-s2.0-S0898122100X0020X/1-s2.0-089812219500119J/main.pdf?x-amz-security-token=AgoJb3JpZ2luX2VjECgaCXVzLWVhc3QtMSJIMEYCIQD#2F00Re8b3wnBnFpZQrjkOeXrNI4bYZ1J6#2F9BcJptZYAAIhAOQjTsZX573uFFEr7QveHx4NaZYWxlZfRN6hr5h1GJWWKuMDCOD#2F#2F#2F#2F#2F#2F#2F#2F#2F#2FwEQAhoMMDU5MDAzNTQ2ODY1IgxqkGe6i8wGmEj6YAwqtwNDKbotYDExP2D6PO8MrlIKYmHCtJhTu1CXLv0N5NKsYT90H2rJTNU0MvqsUsnXtbn6C9t9ed31XTf#2BHc7KrGmpOils7zgrjV1QG4LP0Fu2OcT4#2F#2FOGLWNvVjWY9gOLEHSeG5LhvBbxJiZVrI#2Bm1QAIVz5dxH5DVB27A2e9OmRrswrpPWuxQV#2BUvLkz2dVM4qSkvaDA#2F3KEJk9s0XE74mjO4ZHX7d9Q2aYwxsvFbII6Hms#2FZmB6125tBTwzd0K5xDit5kaoiYadOetp3M#2FvCdaiO0QeQwkV4#2FUaprOIIQGwJaMJuMNe7xInQxF#2B#2FmER81JhWEpBHBmz#2F5p0d2tU7F2oTDc2OR#2BV5dTKab47zgUw648fDT7ays0TQzqTMGnGcX9wIQpxSCam2E8Bhg6tsEs0#2FudddgnsiId368q70xai6ucMfabMSCqnv7O0OZqPVwY5b7qk4mxKIehpIzV6rrtXSAGrH95WGlgGz#2Fhmg9Qq6AUtb8NSqyYw0uZ00E#2FPZmNTnI3nwxjOA5qhyEbw3uXogRwYrv0dLkd50s7oO3mlYFeJDBurhx11t9p94dFqQq7sDY70m#2F4xMNCcmuUFOrMBY1JZuqtQ7QFBVbgzV#2B4xSHV6#2FyD#2F4ezttczZY3eSASJpdC4rjYHXcliiE7KOBHivchFZMIYeF3J4Nvn6UykX5sNfRANC2BDPrgoCQUp95IE5kgYGB8iEISlp40ahVXK62GhEASJxMjJTI9cJ2M#2Ff#2BJkwmqAGjTsBwjxkgiLlHc63rBAEJ2e7xoTwDDql3FSSYcvKzwioLfet#2FvXWvjPzz44tB3#2BTvYamM0uq47XPlUFcTrw#3D&AWSAccessKeyId=ASIAQ3PHCVTYWXNG3EKG&Expires=1554423148&Signature=Ysi80usGGEjPCvw#2BENTSD90NgVs#3D&hash=e5cf30dad62b0b57d7b7f5ba524cccacdbb36d2f747746e7fbebb7717b415820&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=089812219500119J&tid=spdf-a9dae0e9-65fd-4f31-bf3f-e0952eb4176c&sid=5c8c88eb95ed9742632ae57532a4a6e1c6b1gxrqa&type=client
        # Faster for large systems i.e. >50 states
        # Options
        max_iters = 1000
        epsilon_P = 1e-5
        epsilon_S = 1e-5
        # Initialize
        if matrixtype == 'P' or matrixtype == 'PS':
            if P00 is None:
                P = np.copy(Q)
            else:
                P = P00
        if matrixtype == 'S' or matrixtype == 'PS':
            if S00 is None:
                S = np.copy(S0)
            else:
                S = S00
        iterc = 0
        converged = False
        stop = False
        while not stop:
            if matrixtype == 'P' or matrixtype == 'PS':
                P_prev = P
                APAunc = np.zeros([n, n])
                for i in range(p):
                    APAunc += a[i] * mdot(Aa[:, :, i].T, P, Aa[:, :, i])
                BPBunc = np.zeros([n, n])
                for j in range(q):
                    BPBunc += b[j] * mdot(K.T, Bb[:, :, j].T, P, Bb[:, :, j],
                                          K)
                AAP = AK.T
                QQP = sympart(Q + mdot(K.T, R, K) + APAunc + BPBunc)
                P = dlyap(AAP, QQP)
                if np.any(np.isnan(P)) or np.any(
                        np.isinf(P)) or not is_pos_def(P):
                    stable = False
                try:
                    converged_P = la.norm(P - P_prev, 2) / la.norm(
                        P_prev, 2) < epsilon_P
                    stable2 = True
                except:
                    # print(P)
                    # print(P_prev)
                    # print(P-P_prev)
                    # print(la.norm())
                    stable2 = False
                    # print('')
            if matrixtype == 'S' or matrixtype == 'PS':
                S_prev = S
                ASAunc = np.zeros([n, n])
                for i in range(p):
                    ASAunc += a[i] * mdot(Aa[:, :, i], S, Aa[:, :, i].T)
                BSBunc = np.zeros([n, n])
                for j in range(q):
                    BSBunc = b[j] * mdot(Bb[:, :, j], K, S, K.T, Bb[:, :, j].T)
                AAS = AK
                QQS = sympart(S0 + ASAunc + BSBunc)
                S = dlyap(AAS, QQS)
                if np.any(np.isnan(S)) or not is_pos_def(S):
                    stable = False
                converged_S = la.norm(S - S_prev, 2) / la.norm(S,
                                                               2) < epsilon_S
            # Check for stopping condition
            if matrixtype == 'P':
                converged = converged_P
            elif matrixtype == 'S':
                converged = converged_S
            elif matrixtype == 'PS':
                converged = converged_P and converged_S
            if iterc >= max_iters:
                stable = False
            else:
                iterc += 1
            stop = converged or not stable or not stable2
    #        print('\ndlyap iters = %s' % str(iterc))

    elif algo == 'finite_horizon':
        P = np.copy(Q)
        Pt = np.copy(Q)
        S = np.copy(Q)
        St = np.copy(Q)
        converged = False
        stop = False
        while not stop:
            if matrixtype == 'P' or matrixtype == 'PS':
                APAunc = np.zeros([n, n])
                for i in range(p):
                    APAunc += a[i] * mdot(Aa[:, :, i].T, Pt, Aa[:, :, i])
                BPBunc = np.zeros([n, n])
                for j in range(q):
                    BPBunc += b[j] * mdot(K.T, Bb[:, :, j].T, Pt, Bb[:, :, j],
                                          K)
                Pt = mdot(AK.T, Pt, AK) + APAunc + BPBunc
                P += Pt
                converged_P = np.abs(Pt).sum() < 1e-15
                stable = np.abs(P).sum() < 1e10
            if matrixtype == 'S' or matrixtype == 'PS':
                ASAunc = np.zeros([n, n])
                for i in range(p):
                    ASAunc += a[i] * mdot(Aa[:, :, i], St, Aa[:, :, i].T)
                BSBunc = np.zeros([n, n])
                for j in range(q):
                    BSBunc = b[j] * mdot(Bb[:, :, j], K, St, K.T, Bb[:, :,
                                                                     j].T)
                St = mdot(AK, Pt, AK.T) + ASAunc + BSBunc
                S += St
                converged_S = np.abs(St).sum() < 1e-15
                stable = np.abs(S).sum() < 1e10
            if matrixtype == 'P':
                converged = converged_P
            elif matrixtype == 'S':
                converged = converged_S
            elif matrixtype == 'PS':
                converged = converged_P and converged_S
            stop = converged or not stable
    if not stable:
        P = None
        S = None
        if show_warn:
            warnings.simplefilter('always', UserWarning)
            warn('System is possibly not mean-square stable')
    if matrixtype == 'P':
        return P
    elif matrixtype == 'S':
        return S
    elif matrixtype == 'PS':
        return P, S