示例#1
0
def test_cascade():
    np.random.seed(0)
    d = 2  # State dimenstion
    k = 1  # Controller's output dimension
    horizon = 10
    e = np.array([[10.0]])   # Max control input. Set too low can lead to Cholesky failures.

    # Training Dataset
    X0 = np.random.rand(100, d + k)
    A = np.random.rand(d + k, d)
    Y0 = np.sin(X0).dot(A) + 1e-3*(np.random.rand(100, d) - 0.5)  #  Just something smooth
    pilco = PILCO(X0, Y0)
    pilco.controller.max_action = e
    pilco.optimize()

    # Generate input
    m = np.random.rand(1, d)  # But MATLAB defines it as m'
    s = np.random.rand(d, d)
    s = s.dot(s.T)  # Make s positive semidefinite

    M, S, reward = predict_wrapper(pilco, m, s, horizon)

    # convert data to the struct expected by the MATLAB implementation
    policy = oct2py.io.Struct()
    policy.p = oct2py.io.Struct()
    policy.p.w = pilco.controller.W.value
    policy.p.b = pilco.controller.b.value.T
    policy.maxU = e

    # convert data to the struct expected by the MATLAB implementation
    lengthscales = np.stack([model.kern.lengthscales.value for model in pilco.mgpr.models])
    variance = np.stack([model.kern.variance.value for model in pilco.mgpr.models])
    noise = np.stack([model.likelihood.variance.value for model in pilco.mgpr.models])

    hyp = np.log(np.hstack(
        (lengthscales,
         np.sqrt(variance[:, None]),
         np.sqrt(noise[:, None]))
    )).T

    dynmodel = oct2py.io.Struct()
    dynmodel.hyp = hyp
    dynmodel.inputs = X0
    dynmodel.targets = Y0

    plant = oct2py.io.Struct()
    plant.angi = np.zeros(0)
    plant.angi = np.zeros(0)
    plant.poli = np.arange(d) + 1
    plant.dyni = np.arange(d) + 1
    plant.difi = np.arange(d) + 1

    # Call function in octave
    M_mat, S_mat = octave.pred(policy, plant, dynmodel, m.T, s, horizon, nout=2, verbose=True)
    # Extract only last element of the horizon
    M_mat = M_mat[:,-1]
    S_mat = S_mat[:,:,-1]

    np.testing.assert_allclose(M[0], M_mat.T, rtol=1e-4)
    np.testing.assert_allclose(S, S_mat, rtol=1e-4)
示例#2
0
def load_pilco(path, sparse=False):
    X = np.loadtxt(path + 'X.csv', delimiter=',')
    Y = np.loadtxt(path + 'Y.csv', delimiter=',')
    if not sparse:
        pilco = PILCO(X, Y)
    else:
        with open(path+ 'n_ind.txt', 'r') as f:
            n_ind = int(f.readline())
            f.close()
        pilco = PILCO(X, Y, num_induced_points=n_ind)
    params = np.load(path + "pilco_values.npy").item()
    pilco.assign(params)
    for i,m in enumerate(pilco.mgpr.models):
        values = np.load(path + "model_" + str(i) + ".npy").item()
        m.assign(values)
    return pilco
示例#3
0
def load_pilco(path, controller=None, reward=None, sparse=False):
    X = np.loadtxt(path + 'X.csv', delimiter=',', allow_pickle=True)
    Y = np.loadtxt(path + 'Y.csv', delimiter=',', allow_pickle=True)
    if not sparse:
        pilco = PILCO((X, Y), controller=controller, reward=reward)
    else:
        with open(path+ 'n_ind.txt', 'r') as f:
            n_ind = int(f.readline())
            f.close()
        pilco = PILCO((X, Y), num_induced_points=n_ind, controller=controller, reward=reward)
    params = np.load(path + "pilco_values.npy").item()
    pilco.assign(params)
    for i,m in enumerate(pilco.mgpr.models):
        values = np.load(path + "model_" + str(i) + ".npy").item()
        m.assign(values)
    return pilco
示例#4
0
    Y = np.vstack((Y, Y_))

state_dim = Y.shape[1]
control_dim = X.shape[1] - state_dim
policy = RBFNPolicy(state_dim,
                    control_dim=control_dim,
                    num_basis_fun=50,
                    max_action=env.action_space.high[0])

a = 0.25
l = 0.6
j_target, iT = cost(0.25, 0.6)
idxs = [0, 3, 4]
cost = SaturatingCost(j_target, iT, idxs)

pilco = PILCO(X, Y, policy=policy, cost=cost, horizon=40)

pilco.optimize1()

# pilco.predict()

# rollout(policy=pilco_policy, timesteps=100)

# print(Y)
# predictions = pilco.policy.predict(X[0, :])
# print(predictions)

# for i_episode in range(20):
#     pilco.optimize()
#     X_new, Y_new = rollout(policy=pilco_policy, timesteps=100)
#     X_new = X_new.reshape((X_new.shape[0], X_new.shape[2]))
示例#5
0
                        num_basis_fun=10,
                        max_action=env.action_space.high[0])

a = 0.25
l = 0.6
C = np.array([[1., l, 0.0], [0., 0., l]])
iT = a**(-2) * np.dot(C.T, C)

cost = SaturatingCost(state_dim=3, state_idxs=[0, 3, 4], W=iT, t=[0., 0., -1.])
# print(state_dim)
# cost = SaturatingCost(state_dim=state_dim)
# cost = SaturatingCost(
#     x_target=[0., 0., -1.], iT=iT, state_dim=3, state_idxs=[0, 3, 4])
# cost = ExponentialReward(state_dim)

pilco = PILCO(x, y, policy=RBFNPolicy, cost=cost, horizon=40)

for rollouts in range(15):
    # Learn dynamics model & use it to simulate/optimise policy
    pilco.optimize()
    import pdb

    pdb.set_trace()

    # Execute policy on environment
    x_new, y_new = rollout(policy=pilco_policy, timesteps=100)

    # Update dataset
    x = np.vstack((x, x_new))
    y = np.vstack((y, y_new))
    pilco.dynamics_model.set_XY(x, y)