def load_pilco(path, sparse=False): X = np.loadtxt(path + 'X.csv', delimiter=',') Y = np.loadtxt(path + 'Y.csv', delimiter=',') if not sparse: pilco = PILCO(X, Y) else: with open(path+ 'n_ind.txt', 'r') as f: n_ind = int(f.readline()) f.close() pilco = PILCO(X, Y, num_induced_points=n_ind) params = np.load(path + "pilco_values.npy").item() pilco.assign(params) for i,m in enumerate(pilco.mgpr.models): values = np.load(path + "model_" + str(i) + ".npy").item() m.assign(values) return pilco
def test_cascade(): np.random.seed(0) d = 2 # State dimenstion k = 1 # Controller's output dimension horizon = 10 e = np.array([[10.0]]) # Max control input. Set too low can lead to Cholesky failures. # Training Dataset X0 = np.random.rand(100, d + k) A = np.random.rand(d + k, d) Y0 = np.sin(X0).dot(A) + 1e-3*(np.random.rand(100, d) - 0.5) # Just something smooth pilco = PILCO(X0, Y0) pilco.controller.max_action = e pilco.optimize() # Generate input m = np.random.rand(1, d) # But MATLAB defines it as m' s = np.random.rand(d, d) s = s.dot(s.T) # Make s positive semidefinite M, S, reward = predict_wrapper(pilco, m, s, horizon) # convert data to the struct expected by the MATLAB implementation policy = oct2py.io.Struct() policy.p = oct2py.io.Struct() policy.p.w = pilco.controller.W.value policy.p.b = pilco.controller.b.value.T policy.maxU = e # convert data to the struct expected by the MATLAB implementation lengthscales = np.stack([model.kern.lengthscales.value for model in pilco.mgpr.models]) variance = np.stack([model.kern.variance.value for model in pilco.mgpr.models]) noise = np.stack([model.likelihood.variance.value for model in pilco.mgpr.models]) hyp = np.log(np.hstack( (lengthscales, np.sqrt(variance[:, None]), np.sqrt(noise[:, None])) )).T dynmodel = oct2py.io.Struct() dynmodel.hyp = hyp dynmodel.inputs = X0 dynmodel.targets = Y0 plant = oct2py.io.Struct() plant.angi = np.zeros(0) plant.angi = np.zeros(0) plant.poli = np.arange(d) + 1 plant.dyni = np.arange(d) + 1 plant.difi = np.arange(d) + 1 # Call function in octave M_mat, S_mat = octave.pred(policy, plant, dynmodel, m.T, s, horizon, nout=2, verbose=True) # Extract only last element of the horizon M_mat = M_mat[:,-1] S_mat = S_mat[:,:,-1] np.testing.assert_allclose(M[0], M_mat.T, rtol=1e-4) np.testing.assert_allclose(S, S_mat, rtol=1e-4)
def load_pilco(path, controller=None, reward=None, sparse=False): X = np.loadtxt(path + 'X.csv', delimiter=',', allow_pickle=True) Y = np.loadtxt(path + 'Y.csv', delimiter=',', allow_pickle=True) if not sparse: pilco = PILCO((X, Y), controller=controller, reward=reward) else: with open(path+ 'n_ind.txt', 'r') as f: n_ind = int(f.readline()) f.close() pilco = PILCO((X, Y), num_induced_points=n_ind, controller=controller, reward=reward) params = np.load(path + "pilco_values.npy").item() pilco.assign(params) for i,m in enumerate(pilco.mgpr.models): values = np.load(path + "model_" + str(i) + ".npy").item() m.assign(values) return pilco
Y = np.vstack((Y, Y_)) state_dim = Y.shape[1] control_dim = X.shape[1] - state_dim policy = RBFNPolicy(state_dim, control_dim=control_dim, num_basis_fun=50, max_action=env.action_space.high[0]) a = 0.25 l = 0.6 j_target, iT = cost(0.25, 0.6) idxs = [0, 3, 4] cost = SaturatingCost(j_target, iT, idxs) pilco = PILCO(X, Y, policy=policy, cost=cost, horizon=40) pilco.optimize1() # pilco.predict() # rollout(policy=pilco_policy, timesteps=100) # print(Y) # predictions = pilco.policy.predict(X[0, :]) # print(predictions) # for i_episode in range(20): # pilco.optimize() # X_new, Y_new = rollout(policy=pilco_policy, timesteps=100) # X_new = X_new.reshape((X_new.shape[0], X_new.shape[2]))
num_basis_fun=10, max_action=env.action_space.high[0]) a = 0.25 l = 0.6 C = np.array([[1., l, 0.0], [0., 0., l]]) iT = a**(-2) * np.dot(C.T, C) cost = SaturatingCost(state_dim=3, state_idxs=[0, 3, 4], W=iT, t=[0., 0., -1.]) # print(state_dim) # cost = SaturatingCost(state_dim=state_dim) # cost = SaturatingCost( # x_target=[0., 0., -1.], iT=iT, state_dim=3, state_idxs=[0, 3, 4]) # cost = ExponentialReward(state_dim) pilco = PILCO(x, y, policy=RBFNPolicy, cost=cost, horizon=40) for rollouts in range(15): # Learn dynamics model & use it to simulate/optimise policy pilco.optimize() import pdb pdb.set_trace() # Execute policy on environment x_new, y_new = rollout(policy=pilco_policy, timesteps=100) # Update dataset x = np.vstack((x, x_new)) y = np.vstack((y, y_new)) pilco.dynamics_model.set_XY(x, y)