from ltimult import LQRSysMult

if __name__ == "__main__":
    # System problem data
    A = np.array([[0.7, 0.3, 0.2], [-0.2, 0.4, 0.5], [-0.4, 0.2, -0.3]])
    B = np.array([[0.5, -0.3], [0.8, 0.3], [0.1, 0.9]])
    Q = np.eye(3)
    R = np.eye(2)
    S0 = np.eye(3)
    Aa = 0.1 * np.array([[2, 9, -6], [9, 9, 4], [-9, -2, 5]])
    Aa = Aa[:, :, np.newaxis]
    Bb = 0.1 * np.array([[8, 8], [3, 3], [-6, 6]])
    Bb = Bb[:, :, np.newaxis]
    a = np.array([[0.1]])
    b = np.array([[0.1]])
    SS = LQRSysMult(A, B, a, Aa, b, Bb, Q, R, S0)

    # Start with an initially stabilizing (feasible) controller;
    # for this example the system is open-loop mean-square stable
    SS.setK(np.zeros([SS.m, SS.n]))

    # Policy gradient options
    PGO = PolicyGradientOptions(epsilon=(1e-2) * SS.Kare.size,
                                eta=1e-3,
                                max_iters=1000,
                                disp_stride=1,
                                keep_hist=True,
                                opt_method='proximal',
                                keep_opt='last',
                                step_direction='gradient',
                                stepsize_method='constant',
def gen_system_mult(n=8,m=8,safety_margin=0.3,noise='weak',
                    mult_noise_method='random',SStype='ER',
                    seed=None,saveSS=True):

    timestr = str(time()).replace('.','p')
    dirname_out = os.path.join('systems',timestr)

    if seed is not None:
        set_rng_seed(seed)

    if SStype == 'random':
        A,B = gen_system_AB_rand(n,m,safety_margin)
    elif SStype == 'ER':
        A,B = gen_system_AB_erdos_renyi(n,dirname_out=dirname_out)
        m = B.shape[1]
    elif SStype == 'example':
        A = np.array([[0.8,0.3],[-0.2,0.7]])
        B = np.array([[0.5,0.3]]).T
        Q = np.eye(2)
        R = np.eye(1)
        S0 = np.eye(2)
        Aa = np.array([[0.2,0.3],[0.2,0.3]])
        Aa = Aa[:,:,np.newaxis]
        Bb = np.array([[0.2,0.3]]).T
        Bb = Bb[:,:,np.newaxis]
        a = np.array([[0.3]])
        b = np.array([[0.3]])
        SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0)
        SS.dirname = dirname_out
        filename_only = 'system_init.pickle'
        filename_out = os.path.join(dirname_out,filename_only)
        pickle_export(dirname_out, filename_out, SS)
        return SS

    # LQR cost matrices
    Q = np.eye(n)
    # Q = randn(n,n)
    # Q = np.dot(Q,Q')

    R = np.eye(m)
    # R = randn(m,m)
    # R = np.dot(R,R')

    # Initial state distribution covariance
    # S0 = randn(n,n)
    # S0 = np.dot(S0,S0')
    S0 = np.eye(n)

    # Multiplicative noise data
    p = 2  # Number of multiplicative noises on A
    q = 2  # Number of multiplicative noises on B

    if mult_noise_method == 'random':
        Aa = randn(n,n,p)
        Bb = randn(n,m,q)
    elif mult_noise_method == 'rowcol':
        # Pick a random row and column
        Aa = np.zeros([n,n,p])
        Bb = np.zeros([n,m,q])

        Aa[randint(n),:,0] = np.ones(n)
        Aa[:,randint(n),1] = np.ones(n)

        Bb[randint(n),:,0] = np.ones(m)
        Bb[:,randint(m),1] = np.ones(n)
    elif mult_noise_method == 'random_plus_rowcol':
        Aa = 0.3*randn(n,n,p)
        Bb = 0.3*randn(n,m,q)
        # Pick a random row and column
        Aa[randint(n),:,0] = np.ones(n)
        Aa[:,randint(n),1] = np.ones(n)
        Bb[randint(n),:,0] = np.ones(m)
        Bb[:,randint(m),1] = np.ones(n)

    incval = 1.05
    decval = 1.00*(1/incval)
    weakval = 0.90

    # a = randn([p,1])
    # b = randn([q,1])
    a = np.ones([p,1])
    b = np.ones([q,1])
    a = a*(float(1)/(p*n**2))  # scale as rough heuristic
    b = b*(float(1)/(q*m**2))  # scale as rough heuristic

#    noise = 'weak'
    if noise=='weak' or noise=='critical':
        # Ensure near-critically mean square stabilizable
        # increase noise if not
        P,Kare = dare_mult(A,B,a,Aa,b,Bb,Q,R,show_warn=False)
        mss = True
        while mss:
            if Kare is None:
                mss = False
            else:
                a = incval*a
                b = incval*b
                P,Kare = dare_mult(A,B,a,Aa,b,Bb,Q,R,show_warn=False)
        # Extra mean square stabilizability margin
        a = decval*a
        b = decval*b
        if noise == 'weak':
#            print('Multiplicative noise set weak')
            a = weakval*a
            b = weakval*b
    elif noise=='olmss_weak' or noise=='olmss_critical':
        # Ensure near-critically open-loop mean-square stable
        # increase noise if not
        K0 = np.zeros([m,n])
        P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P')
        mss = True
        while mss:
            if P is None:
                mss = False
            else:
                a = incval*a
                b = incval*b
                P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P')
        # Extra mean square stabilizability margin
        a = decval*a
        b = decval*b
        if noise == 'olmss_weak':
#            print('Multiplicative noise set to open-loop mean-square stable')
            a = weakval*a
            b = weakval*b
    elif noise=='olmsus':
        # Ensure near-critically open-loop mean-square unstable
        # increase noise if not
        K0 = np.zeros([m,n])
        P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P')
        mss = True
        while mss:
            if P is None:
                mss = False
            else:
                a = incval*a
                b = incval*b
                P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P')
#        # Extra mean square stabilizability margin
#        a = decval*a
#        b = decval*b
#        print('Multiplicative noise set to open-loop mean-square unstable')
    elif noise=='none':
        print('MULTIPLICATIVE NOISE SET TO ZERO!!!')
        a = np.zeros([p,1])  # For testing only - no noise
        b = np.zeros([q,1])  # For testing only - no noise
    else:
        raise Exception('Invalid noise setting chosen')

    SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0)

    if saveSS:
        SS.dirname = dirname_out
        filename_only = 'system_init.pickle'
        filename_out = os.path.join(dirname_out,filename_only)
        pickle_export(dirname_out, filename_out, SS)

    return SS
示例#3
0
def gradient_estimate_variance(noise, textfile, seed=1):
    npr.seed(seed)
    # Generate the system
    # Two states, diffusion w/ friction and multiplicative noise
    n = 2
    m = 1
    A = np.array([[0.8, 0.1], [0.1, 0.8]])
    B = np.array([[1.0], [0.0]])
    a = np.array([[0.1]])
    Aa = np.array([[0.0, 1.0], [1.0, 0.0]])[:, :, np.newaxis]
    b = np.array([[0.0]])
    Bb = np.array([[0.0], [0.0]])[:, :, np.newaxis]
    Q = np.eye(2)
    R = np.eye(1)
    S0 = np.eye(2)

    if noise:
        SS = LQRSysMult(A, B, a, Aa, b, Bb, Q, R, S0)
    else:
        SS = LQRSys(A, B, Q, R, S0)

    # Initialize
#    K0 = 0.01*np.ones([m,n])
    K0 = np.zeros([m, n])
    SS.setK(K0)
    K = np.copy(SS.K)

    print(SS.c)

    # Number of gradient estimates to collect for variance analysis
    n_iterc = 10

    # Rollout length
    nt = 40

    # Number of rollouts
    nr = int(1e4)

    # Exploration radius
    ru = 1e-2

    G_est_all = np.zeros([n_iterc, m, n])
    error_angle_all = np.zeros(n_iterc)
    error_scale_all = np.zeros(n_iterc)
    error_norm_all = np.zeros(n_iterc)

    headerstr_list = []
    headerstr_list.append('    trial ')
    headerstr_list.append('error angle (deg)')
    headerstr_list.append('  error scale ')
    headerstr_list.append(' error norm')
    headerstr_list.append('true gradient norm')
    headerstr = " | ".join(headerstr_list)
    printout(headerstr, textfile)

    t_start = time()

    for iterc in range(n_iterc):
        # Estimate gradient using zeroth-order optimization

        # Draw random gain deviations and scale to Frobenius norm ball
        Uraw = npr.normal(size=[nr, SS.m, SS.n])
        U = ru * Uraw / la.norm(Uraw, 'fro', axis=(1, 2))[:, None, None]

        # Stack dynamics matrices into a 3D array
        Kd = K + U

        # Simulate all rollouts together
        c = np.zeros(nr)

        # Draw random initial states
        x = npr.multivariate_normal(np.zeros(SS.n), SS.S0, nr)

        for t in range(nt):
            # Accumulate cost
            c += np.einsum('...i,...i', x, np.einsum('jk,...k', SS.QK, x))

            # Calculate closed-loop dynamics
            AKr = SS.A + np.einsum('...ik,...kj', SS.B, Kd)

            if noise:
                for i in range(SS.p):
                    AKr += (SS.a[i]**0.5) * npr.randn(
                        nr)[:, np.newaxis, np.newaxis] * np.repeat(
                            SS.Aa[np.newaxis, :, :, i], nr, axis=0)
                for j in range(SS.q):
                    AKr += np.einsum(
                        '...ik,...kj', (SS.b[j]**0.5) *
                        npr.randn(nr)[:, np.newaxis, np.newaxis] *
                        np.repeat(SS.Bb[np.newaxis, :, :, j], nr, axis=0), Kd)

            # Transition the state
            x = np.einsum('...jk,...k', AKr, x)

        # Estimate gradient
        Glqr = np.einsum('i,i...', c, U)
        Glqr *= K.size / (nr * (ru**2))

        G_est = Glqr
        G_act = SS.grad

        error_angle = (360 / (2 * np.pi)) * np.arccos(
            np.sum((G_est * G_act)) / (la.norm(G_est) * la.norm(G_act)))
        error_scale = (la.norm(G_est) / la.norm(G_act))
        error_norm = la.norm(G_est - G_act)

        G_est_all[iterc] = G_est
        error_angle_all[iterc] = error_angle
        error_scale_all[iterc] = error_scale
        error_norm_all[iterc] = error_norm

        # Print iterate messages
        printstrlist = []
        printstrlist.append("{0:9d}".format(iterc + 1))
        printstrlist.append("   {0:6.2f} / 360".format(error_angle))
        printstrlist.append("{0:8.4f} / 1".format(error_scale))
        printstrlist.append("{0:9.4f}".format(error_norm))
        printstrlist.append("{0:9.4f}".format(la.norm(G_act)))
        printstr = '  |  '.join(printstrlist)
        printout(printstr, textfile)

    t_end = time()
    printout('', textfile)
    printout('mean of error angle', textfile)
    printout('%f' % np.mean(error_angle_all), textfile)
    printout('mean of error scale', textfile)
    printout('%f' % np.mean(error_scale_all), textfile)
    printout('mean of error norm', textfile)
    printout('%f' % np.mean(error_norm_all), textfile)
    #    printout('standard deviation of raw gradient estimate, entrywise',textfile)
    #    printout('%f' % np.std(G_est_all,0),textfile)
    printout('average time per gradient estimate (s)', textfile)
    printout("%.3f" % ((t_end - t_start) / n_iterc), textfile)
    printout('', textfile)

    return G_act, G_est_all, error_angle_all, error_scale_all, error_norm_all
def gen_system_example_suspension():
    n = 4
    m = 1

    m1 = 500
    m2 = 100
    k1 = 5000
    k2 = 20000
    b1 = 200
    b2 = 4000

    A = np.array([[0,1,0,0],
                  [-(b1*b2)/(m1*m2),0,((b1/m1)*((b1/m1)+(b1/m2)+(b2/m2)))-(k1/m1),-(b1/m1)],
                  [b2/m2,0,-((b1/m1)+(b1/m2)+(b2/m2)),1],
                  [k2/m2,0,-((k1/m1)+(k1/m2)+(k2/m2)),0]])

    B = 1000*np.array([[0],
                       [1/m1],
                       [0],
                       [(1/m1)+(1/m2)]])


    C = np.eye(n)
    D = np.zeros([n,m])

    sysc = (A,B,C,D)
    sysd = scipy.signal.cont2discrete(sysc,dt=0.5,method='bilinear')

    A = sysd[0]
    B = sysd[1]

    # Multiplicative noise data
    p = 4
    q = 1

    a = 0.1*np.ones(p)
    b = 0.2*np.ones(q)

    Aa = np.zeros([n,n,p])
    for i in range(p):
        Aa[:,i,i] = np.ones(n)


    Bb = np.zeros([n,m,q])
    for j in range(q):
        Bb[:,j,j] = np.ones(n)


    Q = np.eye(n)
    R = np.eye(m)
    S0 = np.eye(n)

    # Ensure near-critically mean square stabilizable - increase noise if not
    mss = False
    SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0)

    while not mss:
        if SS.ccare < np.inf:
            mss = True
        else:
            a = a*0.95
            b = b*0.95
            SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0)

    timestr = str(time()).replace('.','p')
    dirname_out = os.path.join('systems',timestr)
    SS.dirname = dirname_out
    filename_only = 'system_init.pickle'
    filename_out = os.path.join(dirname_out,filename_only)
    pickle_export(dirname_out, filename_out, SS)
    return SS
def gen_system_erdos_renyi(n,
                           diffusion_constant=1.0,
                           leakiness_constant=0.1,
                           time_constant=0.05,
                           leaky=True,
                           seed=None,
                           detailed_outputs=False,
                           dirname_out='.'):
    npr.seed(seed)
    timestr = str(time()).replace('.', 'p')
    dirname_out = os.path.join('systems', timestr)

    # ER probability
    # crp = 7.0
    # erp = (np.log(n+1)+crp)/(n+1)  # almost surely connected prob=0.999

    mean_degree = 4.0  # should be > 1 for giant component to exist
    erp = mean_degree / (n - 1.0)

    n_edges = 0
    # Create random Erdos-Renyi graph
    # Adjacency matrix
    adjacency = np.zeros([n, n])
    for i in range(n):
        for j in range(i + 1, n):
            if npr.rand() < erp:
                n_edges += 1
                adjacency[i, j] = npr.randint(low=1, high=4)
                adjacency[j, i] = np.copy(adjacency[i, j])

    # Degree matrix
    degree = np.diag(adjacency.sum(axis=0))
    # Graph Laplacian
    laplacian = degree - adjacency
    # Continuous-time dynamics matrices
    Ac = -laplacian * diffusion_constant
    Bc = np.eye(
        n
    ) / time_constant  # normalize just to make B = np.eye(n) later in discrete-time

    if leaky:
        Fc = leakiness_constant * np.eye(n)
        Ac = Ac - Fc

    # Plot
    visualize_graph_ring(adjacency, n, dirname_out)

    # Forward Euler discretization
    A = np.eye(n) + Ac * time_constant
    B = Bc * time_constant
    n = np.copy(n)
    m = np.copy(n)

    # Multiplicative noises
    a = 0.005 * npr.randint(low=1, high=5, size=n_edges) * np.ones(n_edges)
    Aa = np.zeros([n, n, n_edges])
    k = 0
    for i in range(n):
        for j in range(i + 1, n):
            if adjacency[i, j] > 0:
                Aa[i, i, k] = 1
                Aa[j, j, k] = 1
                Aa[i, j, k] = -1
                Aa[j, i, k] = -1
                k += 1

    b = 0.05 * npr.randint(low=1, high=5, size=n) * np.ones(n)
    Bb = np.zeros([n, m, m])
    for i in range(n):
        Bb[i, i, i] = 1

    Q = np.eye(n)
    R = np.eye(m)
    S0 = np.eye(n)

    SS = LQRSysMult(A, B, a, Aa, b, Bb, Q, R, S0)
    SS.dirname = dirname_out
    filename_only = 'system_init.pickle'
    filename_out = os.path.join(dirname_out, filename_only)
    pickle_export(dirname_out, filename_out, SS)
    return SS