Python CartPole示例，CartPole Python示例

示例#1

0

显示文件

def get_random_data_pairs(n,
                          visual=False,
                          stable_equ=False,
                          ext=False,
                          quasi=False,
                          f=False,
                          noise=None):
    # generates n random state initialisations and their change in state
    # stable_equ if generation is only about theta = pi
    # returns 4xn matrices xs and ys for the initial state and change in state respectivvly
    system = CartPole(visual)

    # initial_state = random_init(initial_state,stable_equ)
    # system.setState(initial_state)
    v = 4
    force = 0
    if f:
        v = 5
    initial_state = np.zeros(v)
    xs = np.zeros((v, n))
    ys = np.zeros((4, n))
    seed = 1
    for i in range(n):
        if quasi == False:
            initial_state = random_init(initial_state, stable_equ, ext, f=f)
        else:
            initial_state, seed = quasi_init(seed, f=f)

        system.setState(initial_state[0:4])
        if f:
            force = initial_state[4]
        xs[:, i], ys[:,
                     i] = get_xy_pair(0, system, f=f, force=force,
                                      noise=noise)  ## each column is a sample
    return xs, ys

示例#2

0

显示文件

文件： Non_linear_modelling.py 项目： HarveyHughes/Machine-Learning-3rd-Year-Project

def test_model(c,
               n,
               visual=False,
               loop=False,
               osc=False,
               stable_equ=False,
               model=0,
               f=False):
    # c is 4x4 linear coefficent matrix, or contais alpha and basis location for non linear model
    #  n is number of time iterations to predict
    # stable_equ if motion is about theta = pi
    # plots real state trajectory and linear model predictions
    # model = 0 if linear model
    #modoel = 1 if non linear model
    v = 4
    if f:
        v = 5
    system = CartPole(visual)
    initial_state = np.zeros(v)
    initial_state = random_init(initial_state, stable_equ, f=f)
    initial_state[0] = 0
    if loop == True:
        initial_state = np.array([0, 0, np.pi, 15])
    elif osc == True:
        initial_state = np.array([0, 0, np.pi, 5])

    force = 0
    if f:
        force = initial_state[4]
    system.setState(initial_state[0:4])

    state_history = np.empty((v, n))
    state_history[:, 0] = initial_state[0:v]
    for i in range(n - 1):  # update dynamics n times
        system.performAction(force)  ## runs for 0.1 secs with no force
        system.remap_angle()
        state_history[0:4, i + 1] = system.getState()
        if f:
            state_history[4] = force
    #time = np.arange(0, (n ) * 0.1, 0.1)
    prediction = state_prediction(c, n, initial_state, model, f=f)

    for i in range(4):
        plt.subplot(2, 2, i + 1)
        plt.subplots_adjust(hspace=0.3)
        plt.plot(state_history[i, :], linestyle=':', label='Actual')
        plt.plot(prediction[i, :], label='Predicted')
        plt.xlabel('Iteration')
        plt.title(labels[i])
        plt.legend(loc='upper right')
        if i != 2:
            plt.ylim = (np.min(state_history[i, :]) -
                        0.5 * np.min(state_history[i, :]),
                        np.max(state_history[i, :]) +
                        0.5 * np.max(state_history[i, :]))

        # if i !=0 and i!=2:
        #     plt.ylim(-20, 20)

    plt.show()

示例#3

0

显示文件

文件： task2_2.py 项目： Joycy-xh297/inverted_pendulum

def rollout(max_t, init_state, p, model=None):
    steps = int(max_t / cartpole1.delta_time)  # 0.2s per step
    Xn = init_state[:-1]
    Xn_new = Xn
    if model == None:
        cartpole = CartPole()
        X_cartpole = [Xn[:-1]]
        L_model = 0
        for i in range(steps):
            Xn = Xn_new
            # change the action term according to the policy
            cartpole.setState(Xn[:4])
            action = np.dot(p, Xn)
            cartpole.performAction(action)
            cartpole.remap_angle()
            Xn_new = np.array(cartpole.getState())
            X_cartpole.append(Xn_new)
            L_model += loss(Xn_new)
        return X_cartpole[:-1], L_model
    else:
        X_model = [Xn]
        L_model = 0
        for i in range(steps):
            Xn = Xn_new
            # change the action term according to the policy
            Xn[-1] = np.dot(p, Xn)
            Xn = Xn.reshape(1, Xn.shape[0])
            Yn = predict(Xn, XM, sigma, alpha)
            Yn.resize(Xn.shape)
            Xn_new = Xn + Yn
            Xn_new = np.array(Xn_new[0])
            Xn_new[2] = remap_angle(Xn_new[2])
            X_model.append(Xn_new)
            L_model += loss(Xn_new[:-1])
        return X_model[:-1], L_model

示例#4

0

显示文件

文件： p_noise.py 项目： Joycy-xh297/inverted_pendulum

def rolloutL(p):
    max_t = 15
    state1 = np.array([0, 0, 0.3, 0, 0])
    state2 = np.array([0, 0, 0.15, 0, 0])
    init_state = state1
    cartpole = CartPole()
    steps = int(max_t / cartpole.delta_time)  # 0.2s per step
    Xn = init_state[:-1]
    Xn_new = Xn
    L_model = 0
    for i in range(steps):
        Xn = Xn_new
        cartpole.setState(Xn[:4])
        action = np.dot(p, Xn)
        cartpole.performAction(action)
        cartpole.remap_angle()
        Xn_new = np.array(cartpole.getState())
        n1 = np.random.normal(0, 0.1, 1)[0]
        n2 = np.random.normal(0, 0.1, 1)[0]
        n3 = np.random.normal(0, 0.05, 1)[0]
        n4 = np.random.normal(0, 0.1, 1)[0]
        noise = np.array([n1, n2, n3, n4])
        Xn_new += noise
        L_model += loss(Xn_new)
    return L_model

示例#5

0

显示文件

def loss_function(p):
    model = non_linear_model(10, f=True)
    #model.read_from_file('m=500n=10000')
    model.read_from_file('m=1000n=10000')

    loss = 0
    #initial =np.array([random.normalvariate(0,0.03),random.normalvariate(0,0.03),random.normalvariate(0,0.03),random.normalvariate(0,0.03)]) #+ random.normalvariate(0,0.01)
    initial = np.array([0, 0.2, 0.05, -0.2])  #*10
    n = 20
    loss = loss + loss_pos(initial)
    x = np.zeros((4, 1))
    x[:, 0] = initial[0:4]
    x_extended = np.zeros((5))

    system = CartPole(visual=False)
    system.setState(initial)

    for i in range(n):
        # # using non linear modeelling for training
        change = np.zeros((1, 4))
        if isinstance(p, (list, tuple, np.ndarray)):
            force = np.matmul(p, x)
        else:
            force = np.matmul(p._value, x)

        x_extended[0:4] = x[:, 0]
        x_extended[4] = force
        test = np.matmul(model.alpha.T, model.transform_x(x_extended))
        x = x + test

        # x = system.getState()
        #
        # if isinstance(p, (list, tuple, np.ndarray)):
        #       force = np.matmul(p,x)
        # else:
        #       force = np.matmul(p._value, x)
        #
        # #force = np.matmul(p, x)
        # system.performAction(force)
        # x= system.getState()

        x[2] = remap_angle(x[2])
        loss += loss_pos(x)
    return loss

示例#6

0

显示文件

文件： task2_3.py 项目： Joycy-xh297/inverted_pendulum

def rolloutL(p):
    max_t = 8
    state1 = np.array([0, 0, 0.2, 0, 0])
    state2 = np.array([0, 0, 0.15, 0, 0])
    init_state = state1
    cartpole = CartPole()
    steps = int(max_t / cartpole.delta_time)  # 0.2s per step
    Xn = init_state[:-1]
    Xn_new = Xn
    L_model = 0
    for i in range(steps):
        Xn = Xn_new
        cartpole.setState(Xn[:4])
        action = np.dot(p, Xn)
        cartpole.performAction(action)
        cartpole.remap_angle()
        Xn_new = np.array(cartpole.getState())
        L_model += cartpole.loss()
    return L_model

示例#7

0

显示文件

文件： task2_3.py 项目： Joycy-xh297/inverted_pendulum

def rolloutp(max_t, init_state, p):
    steps = int(max_t / cartpole1.delta_time)  # 0.2s per step
    Xn = init_state[:-1]
    Xn_new = Xn
    cartpole = CartPole()
    X_cartpole = [Xn]
    L_model = 0
    for i in range(steps):
        Xn = Xn_new
        cartpole.setState(Xn[:4])
        action = np.dot(p, Xn)
        cartpole.performAction(action)
        cartpole.remap_angle()
        Xn_new = cartpole.getState()
        X_cartpole.append(np.array(Xn_new))
        L_model += loss(Xn_new)
    X_cartpole = np.array(X_cartpole)
    return X_cartpole[:-1], L_model

示例#8

0

显示文件

文件： linear_mse.py 项目： Joycy-xh297/inverted_pendulum

def gen_train(N, s):
    """try generate the dataset and add noise together"""
    cartpole1 = CartPole()
    X = []  # true value for current state
    XX = []  # true value for next state
    X_wn = []  # observed current state
    Y = []  # true change in state
    Y_wn = []  # observed change in state
    for i in range(N):
        x = np.random.uniform([-5, 5, 1])[0]
        x_dot = np.random.uniform([-10, 10, 1])[0]
        theta = np.random.uniform([-np.pi, np.pi, 1])[0]
        theta_dot = np.random.uniform([-15, 15, 1])[0]
        Xn = np.array([x, x_dot, theta, theta_dot])
        X.append(Xn)
        cartpole1.setState(Xn)
        cartpole1.performAction()
        cartpole1.remap_angle()
        Xn_1 = np.array(cartpole1.getState())
        XX.append(Xn_1)
        Y.append(Xn_1 - Xn)

    X = np.array(X)
    Xnoise = np.random.normal(0, s, size=(N, 4))
    X_wn = X + Xnoise
    XXnoise = np.random.normal(0, s, size=(N, 4))
    XX_wn = XX + XXnoise
    Y = np.array(Y)
    Y_wn = np.array(XX_wn - X_wn)
    # coef = lin_reg(X,Y)
    # coef_wn = lin_reg(X_wn,Y_wn)
    reg1 = LinearRegression().fit(X, Y)
    reg2 = LinearRegression().fit(X_wn, Y_wn)
    inter1 = reg1.intercept_
    inter2 = reg2.intercept_
    coef = reg1.coef_
    coef_wn = reg2.coef_
    # print(coef)
    # print(coef_wn)
    Y_predict = np.matmul(X, coef) + inter1
    Y_predict = np.array(Y_predict)
    Y_predict_wn = np.matmul(X, coef_wn) + inter2
    Y_predict_wn = np.array(Y_predict_wn)
    return mse(Y_predict, Y), mse(Y_predict_wn, Y)

示例#9

0

显示文件

def scan_step(to_scan,n,visual=False,f=False):
    # scnes through one state variable with n points scan variable given by to_scan = [0,3]
    # plots the next state vector


    system = CartPole(visual)
    initial_state = np.zeros(4)
    initial_state = random_init(initial_state,f=f)

    initial_state[to_scan] = 0 - ranges[to_scan]/2   ##makes it scan from the start of a variables range
    scanned_values = np.zeros(n)
    reached_states = np.zeros((n,4))
    step = ranges[to_scan]/n
    for i in range(n): ##number of scanning variables
        x = initial_state.copy() ##takes a copy of this state
        x[to_scan] = x[to_scan]+ i * step ##changes one state variable
        system.setState(x)
        if to_scan == 2:
            system.remap_angle()              ##remaps the angle
            x[to_scan] = remap_angle(x[to_scan])
        scanned_values[i] = x[to_scan]

        system.performAction(x[4])
        reached_states[i,:] = system.getState()

    plt.plot(scanned_values, reached_states[:, 0], Label='Position')
    plt.plot(scanned_values, reached_states[:, 1], Label='Velocity')
    plt.plot(scanned_values, reached_states[:, 2], Label='Angle')
    plt.plot(scanned_values, reached_states[:, 3], Label='Angular Velocity')


    plt.xlabel(labels[to_scan])
    plt.ylabel('Reached state')
    plt.legend(loc='upper left')
    plt.title(
            'Initial state: ' + str(np.round(initial_state,2)) + ' Scan through ' + labels[to_scan] )
    plt.show()

示例#10

0

显示文件

文件： task2_4.py 项目： Joycy-xh297/inverted_pendulum

def modelL(p):
    max_t = 5
    state1 = np.array([0, 0, 0.1, 0, 0])
    state2 = np.array([0, 0, 0.3, 0, 0])
    init_state = state1
    cartpole = CartPole()
    steps = int(max_t / cartpole.delta_time)  # 0.2s per step
    Xn = init_state
    Xn_new = Xn
    L_model = 0
    for i in range(steps):
        Xn = Xn_new
        # change the action term according to the policy
        Xn[-1] = np.dot(p, Xn[:-1])
        Xn = Xn.reshape(1, Xn.shape[0])
        Yn = predict(Xn, XM, sigma, alpha)
        Yn.resize(Xn.shape)
        Xn_new = Xn + Yn
        Xn_new = np.array(Xn_new[0])
        Xn_new[2] = remap_angle(Xn_new[2])
        L_model += loss(Xn_new)
    return L_model

示例#11

0

显示文件

def non_linear_loss_function(p):
    model = non_linear_model(10, f=True)
    #model.read_from_file('m=500n=10000')
    model.read_from_file('m=1000n=10000')

    policy = non_linear_model(10, policy=True)
    #policy.read_from_file('nonzero')  ### change at somepoint so that sd's are part of the p input?
    #policy.read_from_file('justtop')
    policy.read_from_file('wholetest')
    # policy.read_from_file('flickup')

    #different optmisation situations
    optimise_loc = False
    sym = False
    top_constant = False
    bot_constant = False
    bounce_back = True

    if optimise_loc == True:
        m = int((p.shape[0] - 2) / 4)  #policy.m
        #policy = non_linear_model(m,policy=True)
        #policy.sd = np.ones((4,m))
        policy.basis[:, 0] = p[:4].T
        policy.basis[:, 1] = -policy.basis[:, 0]
        policy.basis[:, 2] = p[4:4 + 4].T
        policy.basis[:, 3] = -policy.basis[:, 2]
        p = [-27.669, 27.669, -10.98, 10.98]
        policy.sd[0, :] = [30, 30, 30, 30]
        policy.sd[1, :] = [30, 30, 30, 30]

    if sym:
        policy.basis[:, 0] = p[2:6].T
        policy.basis[:, 1] = -policy.basis[:, 0]
        policy.basis[:, 2] = p[6:10].T
        policy.basis[:, 3] = -policy.basis[:, 2]
        #print(policy.basis)
        p = [p[0], -p[0], p[1], -p[1]]

    if top_constant:
        p = np.array([-27.669, 27.669, -10.98, 10.98, p[0], p[1], 0,
                      p[2]])  #for updating 3 on lhs

    if bot_constant:
        p = [p[0], -p[0], p[1], -p[1], 76, -77, 0, -37.025]

    if bounce_back:
        p = np.array([
            -27.669, 27.669, -10.98, 10.98, 76, -77, p[0], -37.025, -p[0],
            p[1], -p[1]
        ])

    loss = 0
    #initial=np.array([0,0,np.pi,0])
    #initial = np.array([0, 0.2, 0.05, -0.2])*5
    initial = np.array([-5.5, -3.36, -0.56, 0.42])
    n = 30
    loss = loss + loss_pos(initial)
    x = np.zeros((4, 1))
    x[:, 0] = initial[0:4]
    x_extended = np.zeros((5))

    system = CartPole(visual=False)
    system.setState(initial)

    for i in range(n):
        # # using non linear modeelling for training
        change = np.zeros((1, 4))
        x_policy = policy.transform_x(x)
        if isinstance(p, (list, tuple, np.ndarray)):
            force = np.matmul(p, x_policy)
        else:
            force = np.matmul(p._value, x_policy)

        x_extended[0:4] = x[:, 0]
        x_extended[4] = force
        test = np.matmul(model.alpha.T, model.transform_x(x_extended))
        x = x + test
        x[2] = remap_angle(x[2])

        # x = system.getState()
        # x_policy = policy.transform_x(x)
        # if isinstance(p, (list, tuple, np.ndarray)):
        #       force = np.matmul(p,x_policy)
        # else:
        #       force = np.matmul(p._value, x_policy)
        #
        # #force = np.matmul(p, x)
        # system.performAction(force)
        # x= system.getState()

        loss += loss_pos(x)
    return loss

示例#12

0

显示文件

def test_policy(p,
                n,
                visible=False,
                sd=0.03,
                bottom=False,
                nlm=None,
                Top=False):
    system = CartPole(visual=visible)
    #system.setState(np.array([0,0.01,-0.01,-0.01]))
    system.setState(
        np.array([
            random.normalvariate(0, sd),
            random.normalvariate(0, sd),
            random.normalvariate(0, sd),
            random.normalvariate(0, sd)
        ]))
    if nlm != None and Top == False: bottom = True
    if bottom:
        system.setState(np.array([0, 0, np.pi, 0]))
        #sd=1
        #system.setState(np.array([random.normalvariate(0, sd), random.normalvariate(0, sd), np.pi ,random.normalvariate(0, sd)]))
        #system.setState(np.array([-5.5,-3.36,-0.56,0.42]))

    state_history = np.empty((n, 4))
    #system.setState(np.array([0, 0, 0,5.1]))
    #system.setState(np.array([0, 0.2, 0.05, -0.2])*5)
    # system.setState(np.array([0, 0.2, 1, -4]) ) #get it to 1 rad at -4rads^1 and itll settle
    for i in range(n):
        x = system.getState()
        if x.ndim == 1:
            state_history[i, :] = x
        else:
            state_history[i, :] = x.reshape((4))

        if nlm == None:
            force = np.matmul(p, x)
        else:
            x_ext = nlm.transform_x(x)
            force = np.matmul(p, x_ext)
            #state_history[i,2]=force

        system.performAction(force)
        system.remap_angle()

    plt.plot(state_history[:, 2], label=r'$\theta$')
    plt.plot(state_history[:, 1], label='v')
    plt.plot(state_history[:, 3], label=r'$\dot{\theta}$')
    plt.plot(state_history[:, 0], label='x')
    plt.xlabel('Iteration')
    #plt.ylabel(r'$\theta$')
    plt.legend(loc='lower left')
    plt.title('P=' + str(p))
    #plt.ylim([-0.2,0.2])
    plt.show()

示例#13

0

显示文件

def loss_trajectory(initial, n, p, model=None, visual=False, nlp=None):
    loss = 0
    if model == None:
        system = CartPole(visual)
        system.setState(initial)
        loss += system.loss()
    else:
        loss += loss_pos(initial)
        x = initial[0:4]
        x_extended = np.zeros(5)

    for i in range(n):
        if model == None:  #using model dynamics for initial scans
            x = system.getState()
            force = np.matmul(p, x)
            system.performAction(force)
            system.remap_angle()
            loss += system.loss()
        else:  # using non linear modeelling for training
            if nlp == None:
                force = np.matmul(p, x)
            else:
                force = np.matmul(p, nlp.transform_x(x))
            x_extended[0:4] = x
            x_extended[4] = force
            change = np.matmul(model.alpha.T, model.transform_x(x_extended)).T
            x += change
            x[2] = remap_angle(x[2])
            loss += loss_pos(x)
    return loss

示例#14

0

显示文件

文件： task1.py 项目： Joycy-xh297/inverted_pendulum

from CartPole import *
import numpy as np
import matplotlib.pyplot as plt
"""task1.1 rollout simulation"""
cartpole1 = CartPole()
cartpole1.delta_time = 0.05
max_t = 5.0  # terminate time for the simulation
n = int(max_t / cartpole1.delta_time)  # number of iterations for performAction
state1 = [0, 1, np.pi, 0]  # nonzero cart velocity
state2 = [0, 0, np.pi, -14.7]  # nonzero angular velocity
init_state = state1
cartpole1.setState(state=init_state)
# list of variables
x, x_dot, theta, theta_dot = [], [], [], []
t = np.arange(0, max_t, cartpole1.delta_time)
# simulation
for i in range(n):
    cartpole1.performAction()
    # cartpole1.remap_angle()
    current_state = cartpole1.getState()
    x.append(current_state[0])
    x_dot.append(current_state[1])
    theta.append(current_state[2])
    theta_dot.append(current_state[3])
# plotting the results
fig, axs = plt.subplots(2, 2, figsize=(9, 16), constrained_layout=True)
axs[0, 0].plot(t, x)
axs[0, 0].set_title('cart_location')
axs[0, 0].set_xlabel('time (s)')
axs[0, 0].set_ylabel('location (m)')
axs[0, 1].plot(t, x_dot)

示例#15

0

显示文件

文件： task2_4.py 项目： Joycy-xh297/inverted_pendulum

        # change the action term according to the policy
        cartpole.setState(Xn[:4])
        action = np.dot(p, Xn)
        cartpole.performAction(action)
        cartpole.remap_angle()
        Xn_new = cartpole.getState()
        X_cartpole.append(np.array(Xn_new))
        L_model += loss(Xn_new)
    X_cartpole = np.array(X_cartpole)
    return X_cartpole[:-1], L_model


# N = 1000 # NO of datapoints
# M = 1000 # NO of data locations for basis function
# lam = 10**(-4) # variance of data noise
cartpole1 = CartPole()
# X = []
# Y = []
# for i in range(N):
#     x = random.uniform(-5,5)
#     x_dot = random.uniform(-10,10)
#     theta = random.uniform(-np.pi,np.pi)
#     theta_dot = random.uniform(-15,15)
#     act = random.uniform(-10,10)
#     Xn = np.array([x,x_dot,theta,theta_dot,act])
#     X.append(Xn)
#     cartpole1.setState(Xn[:-1])
#     cartpole1.performAction(action=Xn[-1])
#     Xn_1 = np.array(cartpole1.getState())
#     Y.append(Xn_1-Xn[:-1])
# X = np.array(X)

示例#16

0

显示文件

def test_force_start(f=0,n=30, all=False):
    system = CartPole(visual=False)
    system.setState([0, 0, np.pi, 0])  ## initialise
    state_history = np.empty((n + 1, 4,n))
    for i in range(n):
        state_history[0, :,i] = [0, 0, np.pi,0]
    time = np.arange(0, (n + 0.5) * 0.1, 0.1)
    for fn in range(n):
        system.setState([0, 0, np.pi, 0])
        for i in range(n):  # update dynamics n times
            if i < fn+1:
                system.performAction(f)  ## runs for 0.1 secs with no
            else:
                system.performAction(0)
            system.remap_angle()
            state_history[i + 1, :,fn] = system.getState()#
        if fn%3==0:
            plt.plot(time, state_history[:, 2,fn], Label=str(fn+1) + ' Steps')
            if all:
                plt.plot(time, state_history[:, 0, fn], Label=str(fn + 1) + ' Steps-x')
                plt.plot(time, state_history[:, 1, fn], Label=str(fn + 1) + ' Steps-v')
                plt.plot(time, state_history[:, 3, fn], Label=str(fn + 1) + r' Steps-$\dot{\theta}$')
    plt.xlabel('Time')
    plt.ylabel(r'$\theta$')
    plt.legend(loc='lower left')
    plt.title('Constant force effect for start')
    plt.show()

示例#17

0

显示文件

def predict(X,XM,sigma,alpha):
    K_MN = kernel(X,XM,sigma)
    return np.matmul(K_MN,alpha)

def l(X,sigma):
    """X: state vector"""
    sum = 0
    for i,x in enumerate(X):
        sum += -0.5*np.linalg.norm(x)**2/sigma[i]**2
    return 1.0-np.exp(sum)
        

N = 1000 # NO of datapoints
M = 800 # NO of data locations for basis function
lam = 10**(-4) # variance of data noise
cartpole1 = CartPole()

# added from 2.2
X = []
Y = []
for i in range(N):
    x = random.uniform(-5,5)
    x_dot = random.uniform(-10,10)
    theta = random.uniform(-np.pi,np.pi)
    theta_dot = random.uniform(-15,15)
    act = random.uniform(-20,20)
    Xn = np.array([x,x_dot,theta,theta_dot,act])
    X.append(Xn)
    cartpole1.setState(Xn[:-1])
    cartpole1.performAction(action=Xn[-1])
    Xn_1 = np.array(cartpole1.getState())

示例#18

0

显示文件

文件： task1_22.py 项目： Joycy-xh297/inverted_pendulum

import numpy as np
import matplotlib.tri as tri
from CartPole import *

# task 1.2 changes of state: model Y = X(T) - X(0)
"""initialisation"""
cartpole1 = CartPole()

# scans of variables
"""setting the state scans"""
x_scan = np.linspace(-5, 5, 100)
x_dot_scan = np.linspace(-10, 10, 100)
theta_scan = np.linspace(np.pi - 0.5, np.pi, 100)
theta_dot_scan = np.linspace(10, 15, 100)

# """Y plot for x_dot scan"""
# Y0 = []
# for x in x_scan:
#     X = np.array([x,0,np.pi,0])
#     cartpole1.setState(X)
#     cartpole1.performAction()
#     Y = np.array(cartpole1.getState())
#     Y0.append((Y-X))
# Y0 = np.array(Y0)

# """Y plot for x_dot scan"""
# Y1 = []
# for x_dot in x_dot_scan:
#     X = np.array([0,x_dot,np.pi,0])
#     cartpole1.setState(X)
#     cartpole1.performAction()

示例#19

0

显示文件

import numpy as np
import numpy.random as random
from sklearn import linear_model
from CartPole import *
# from task1_3 import *

# linear model Y=CX, X(n+1)=X(n)+CX(n)

cartpole1 = CartPole()
"""generating random states X and corresponding Y"""
# X = []
# Y = []
# for i in range(1000):
#     x = random.uniform([-5,5,1])[0]
#     x_dot = random.uniform([-10,10,1])[0]
#     theta = random.uniform([-np.pi,np.pi,1])[0]
#     theta_dot = random.uniform([-15,15,1])[0]
#     Xn = np.array([x,x_dot,theta,theta_dot])
#     X.append(Xn)
#     cartpole1.setState(Xn)
#     cartpole1.performAction()
#     Xn_1 = np.array(cartpole1.getState())
#     Y.append(Xn_1-Xn)
# X = np.array(X)
# Y = np.array(Y)

# """performing linear regression"""
# model = linear_model.LinearRegression()
# model.fit(X,Y)

# change the time step

示例#20

0

显示文件

def rollout (initial_state, n = 100, visual =False,f=0,fn=0):
    ## takes an intitial state array for the initial cart and angular velocity,
    ## itialises at stable equilibrium theta = pi , x =0
    ## runs for n iterations of euler dynamics each iteration is 0.1 seconds
    ## no applied force
    ## plots state variables

    system = CartPole(visual)
    system.setState([0, initial_state[0], np.pi, initial_state[1]])  ## initialise
    state_history= np.empty((n+1,4))
    state_history[0,:] = [0, initial_state[0], np.pi, initial_state[1]]
    for i in range(n): #update dynamics n times
        if i<fn:
            system.performAction(f)  ## runs for 0.1 secs with no
        else: system.performAction(0)
        system.remap_angle()
        state_history[i+1,:] = system.getState()
    time=np.arange(0,(n+0.5)*0.1,0.1)


    plt.plot(time,state_history[:,0], Label = 'Position')
    plt.plot(time,state_history[:,1], Label = 'Velocity')
    plt.plot(time,state_history[:,2], Label = 'Angular Position')
    plt.plot(time,state_history[:,3], Label = 'Angular Velocity')
    plt.xlabel('Time')
    plt.legend(loc='upper right')
    plt.title('Initial Velocity : '+ str(initial_state[0])+ ' , Initial Angular velocity : '+ str(initial_state[1]))
    plt.show()

示例#21

0

显示文件

def contour_plot(n,x_var, y_var , cont, visual = False , model = None,error=False,f=False):
    # plots a 3d surface plot and contoru plot where x_var , Y_var are scanned through the suitable range
    # cont is the variable to be plotted on the contours
    #n is the number of points to evaluate at in the range
    # x_var,y_var and cont are either 0,1,2,3
    v=4
    if f:
        v=5
    system = CartPole(visual)
    initial_state = np.zeros(v)
    initial_state = random_init(initial_state,f=f)
    force=0
    if f:
        force = initial_state[4]

    scanned_x = np.arange( - ranges[x_var]/2, ranges[x_var]/2, ranges[x_var]/n )  ##scanning variable 1yield
    scanned_y = np.arange( - ranges[y_var]/2, ranges[y_var]/2, ranges[y_var]/n )  ##scanning variable 1yield

    X, Y = np.meshgrid(scanned_x, scanned_y)

    rav_X=np.ravel(X)
    rav_Y=np.ravel(Y)
    rav_Z=np.zeros(rav_X.shape[0])
    for i in range(rav_X.shape[0]):
        x = initial_state.copy()
        x[x_var] = rav_X[i]
        x[y_var] = rav_Y[i]

        system.setState(x)

        system.performAction(force)
        rav_Z[i] = system.getState()[cont]

        if error ==True:
            change = np.zeros((1,v))
            change[0,0:4] = np.matmul(model.alpha.T, model.transform_x(x)).T

            next_state = x + change

            rav_Z[i] = abs(next_state[0,cont] - rav_Z[i])

    Z = rav_Z.reshape(X.shape)

    colour = 'inferno'
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1 , cmap=colour)
    # Add a color bar which maps values to colors.
    cbar =fig.colorbar(surf, shrink=0.5, aspect=5)
    cbar.set_label(labels[cont])
    plt.xlabel(labels[x_var])
    plt.ylabel(labels[y_var])
    ax.set_zlabel(labels[cont])
    plt.title('Initial state: ' + str(np.round(initial_state,2)) + '\n Scan through ' + labels[x_var]+ ' and ' + labels[y_var] )

    if error == True:
        ax.set_zlabel('Error in ' + labels[cont])
        plt.scatter(model.basis[x_var,:],model.basis[y_var,:],np.ones(model.basis[x_var,:].shape[0])*0.3)
    else:
        cont_plot = plt.tricontourf(rav_X, rav_Y, rav_Z, levels=14, cmap=colour, offset = np.amin(rav_Z))

    plt.show()

    cont_plot = plt.tricontourf(rav_X,rav_Y,rav_Z,levels=14, cmap=colour)
    cbar = plt.colorbar(cont_plot,shrink=0.5, aspect=5)
    cbar.set_label(labels[cont])
    plt.xlabel(labels[x_var])
    plt.ylabel(labels[y_var])
    plt.title(
        'Initial state: ' + str(np.round(initial_state, 2)) + '\n Scan through ' + labels[x_var] + ' and ' + labels[
            y_var])

    plt.show()

示例#22

0

显示文件

def scan_all(n,type=0,visual=False, c =None,f=False,nlm=False):  ##type 0 is a regular scan, type 1 is a change in variable scan
    ## c iks a 4x4 linear coefficent matrix for plotting model scans
    # scans through all state variables and plots either the next state or the change in state

    system = CartPole(visual)
    if f:
        initial_state = np.zeros(5)
    else:
        initial_state = np.zeros(4)
    initial_state = random_init(initial_state,f=f)
    line_lables=[None,None,None,None,None,None,None,None]
    fig = plt.figure()
    for to_scan in range(4):
        plot_no=to_scan
        scanned_state = np.zeros((4,n))
        if nlm:
            if c.v==5:
                to_scan+=1
                scanned_state = np.zeros((5, n))
        initial_state[to_scan] = 0 - ranges[to_scan]/2   ##makes it scan from the start of a variables range
        scanned_values = np.zeros(n)
        reached_states = np.zeros((n,4))

        Y = np.zeros((n,4))
        step = ranges[to_scan]/n
        for i in range(n): ##number of scanning variables
            x = initial_state.copy() ##takes a copy of this state
            x[to_scan] = x[to_scan]+ i * step ##changes one state variable
            system.setState(x)
            if to_scan == 2:
                system.remap_angle()              ##remaps the angle
                x[to_scan] = remap_angle(x[to_scan])
            scanned_values[i] = x[to_scan]
            scanned_state[:,i] = x

            if f:
                system.performAction(x[4])
            else:
                system.performAction(0)
            reached_states[i,:] = system.getState()
            Y[i,:] = reached_states[i,:] - x[:4]

        fig.add_subplot(2,2,plot_no+1)
        plt.subplots_adjust(hspace=0.3)
        if plot_no == 3: #add labels
            line_lables=['x','v',r"$\theta$",r"$\dot{\theta}$",'Predicted x','Predicted v',r"Predicted $\theta$",r" Predicted $\dot{\theta}$"]

        if type == 0 : #plot regular scan
            plt.plot(scanned_values, reached_states[:, 0], Label=line_lables[0])
            plt.plot(scanned_values, reached_states[:, 1], Label=line_lables[1])
            plt.plot(scanned_values, reached_states[:, 2], Label=line_lables[2])
            plt.plot(scanned_values, reached_states[:, 3], Label=line_lables[3])
            plt.ylabel('Reached state')

        elif type ==1: #plot change
            if np.any(c)!=None:
                if nlm:
                    scanned_state=c.transform_x(scanned_state)
                    predictions = np.matmul(c.alpha.T,scanned_state)
                else:
                    predictions = np.matmul(c,scanned_state)

                plt.plot(scanned_values[:], predictions[0, :],linestyle=':',color= 'b', Label=line_lables[4])
                plt.plot(scanned_values[:], predictions[1, :],linestyle=':',color= 'orange', Label=line_lables[5])
                plt.plot(scanned_values[:], predictions[2, :],linestyle=':',color= 'g', Label=line_lables[6])
                plt.plot(scanned_values[:], predictions[3, :],linestyle=':',color= 'r', Label=line_lables[7])

            plt.plot(scanned_values[:], Y[:, 0],color= 'b', Label=line_lables[0])
            plt.plot(scanned_values[:], Y[:, 1],color= 'orange', Label=line_lables[1])
            plt.plot(scanned_values[:], Y[:, 2],color= 'g', Label=line_lables[2])
            plt.plot(scanned_values[:], Y[:, 3],color= 'r', Label=line_lables[3])
            plt.ylabel('Change in State Variable')


        labels = [ "Position" , "Velocity" , "Angle" , "Angular Velocity", "Force"]
        plt.xlabel(labels[to_scan])
        #plt.legend(loc='upper left')
        plt.title(
                'Initial state: ' + str(np.round(initial_state,2)) + ' Scan through ' + labels[to_scan] )
    fig.legend(loc='lower center', ncol=4)
    plt.show()

示例#23

0

显示文件

文件： task2_3.py 项目： Joycy-xh297/inverted_pendulum

    return np.matmul(K_MN, alpha)


def l(X, sigma):
    """X: state vector"""
    sum = 0
    for i, x in enumerate(X):
        sum += -0.5 * np.linalg.norm(x)**2 / sigma[i]**2
    return 1.0 - np.exp(sum)


# generating dataset
N = 1000  # NO of datapoints
M = 640  # NO of data locations for basis function
lam = 10**(-4)  # variance of data noise
cartpole1 = CartPole()
X = []
Y = []
for i in range(N):
    x = random.uniform(-5, 5)
    x_dot = random.uniform(-10, 10)
    theta = random.uniform(-np.pi, np.pi)
    theta_dot = random.uniform(-15, 15)
    act = random.uniform(-20, 20)
    Xn = np.array([x, x_dot, theta, theta_dot, act])
    X.append(Xn)
    cartpole1.setState(Xn[:-1])
    cartpole1.performAction(action=Xn[-1])
    Xn_1 = np.array(cartpole1.getState())
    Y.append(Xn_1 - Xn[:-1])
X = np.array(X)

示例#24

0

显示文件

import CartPole

CartPole.CartPole(Train=False,
                  epoch=1000,
                  game_step=1000,
                  gradient_update=10,
                  learning_rate=0.01,
                  discount_factor=0.95,
                  save_weight=100,
                  save_path="CartPole",
                  rendering=False).Graph()

示例#25

0

显示文件

from CartPole import *

# linear model Y=CX


def lin_reg(X, Y):
    X = np.matrix(X)
    XT = np.matrix.transpose(X)
    Y = np.matrix(Y)
    XT_X = np.matmul(XT, X)
    XT_Y = np.matmul(XT, Y)
    betas = np.matmul(np.linalg.inv(XT_X), XT_Y)
    return betas


cartpole1 = CartPole()
"""generating random states X and corresponding Y"""
X = []
Y = []
N = 1000  # no of datapoints
for i in range(N):
    x = random.uniform([-5, 5, 1])[0]
    x_dot = random.uniform([-10, 10, 1])[0]
    theta = random.uniform([-np.pi, np.pi, 1])[0]
    theta_dot = random.uniform([-15, 15, 1])[0]
    Xn = np.array([x, x_dot, theta, theta_dot])
    X.append(Xn)
    cartpole1.setState(Xn)
    cartpole1.performAction()
    cartpole1.remap_angle()
    Xn_1 = np.array(cartpole1.getState())

示例#26

0

显示文件

os.environ["CUDA_VISIBLE_DEVICES"] = '0'

# 현재 사용하고 있는 GPU 번호를 얻기 위한 코드 - 여러개의 GPU를 쓸 경우 정보 확인을 위해!
print("<<< Ubuntu Terminal 창에서 지정해준 경우, 무조건 GPU : 1대, GPU 번호 : 0 라고 출력 됨 >>>")
local_device_protos = device_lib.list_local_devices()
GPU_List = [x.name for x in local_device_protos if x.device_type == 'GPU']
# gpu_number_list = []
print("<<< # 사용 가능한 GPU : {} 대 >>>".format(len(GPU_List)))
print("<<< # 사용 가능한 GPU 번호 : >>>", end="")
for i, GL in enumerate(GPU_List):
    num = GL.split(":")[-1]
    # gpu_number_list.append(num)
    if len(GPU_List) - 1 == i:
        print(" " + num)
    else:
        print(" " + num + ",", end="")

CP = CartPole(model_name="CartPole",
              epoch=1000,
              gradient_update=10,
              learning_rate=0.01,
              training_display=True,
              SaveGameMovie=True,
              discount_factor=0.95,
              save_weight=100,
              save_path="CartPole",
              only_draw_graph=False)

#CP.train #학습
CP.test  # 테스트

示例#27

0

显示文件

import numpy as np
from CartPole import *

# task 1.2 changes of state

"""initialisation"""
cartpole1 = CartPole()

"""setting the state scans"""
x_scan = np.linspace(-5,5,100)
x_dot_scan = np.linspace(-10,10,100)
theta_scan = np.linspace(np.pi-0.5,np.pi,100)
theta_dot_scan = np.linspace(10,15,100)

"""Y plot for x scan"""
Y0 = []
for x in x_scan:
    X = [x,2,np.pi-1,5]
    cartpole1.setState(X)
    cartpole1.performAction()
    Y = cartpole1.getState()
    Y0.append(Y)
Y0 = np.array(Y0)

"""Y plot for x_dot scan"""
Y1 = []
for x_dot in x_dot_scan:
    X = [0,x_dot,np.pi-1,5]
    cartpole1.setState(X)
    cartpole1.performAction()
    Y = cartpole1.getState()

Python CartPole, cs229-solutions-2020示例