Exemplo n.º 1
0
        params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)
        return norm_dist, params, mu, mu

    def choose_action(self, s):
        s = s[np.newaxis, :]
        a = self.sess.run(self.sample_op, {self.tfs: s})[0]
        # print(self.sess.run(self.mu, {self.tfs: s})[0])
        return a  #np.clip(a, a_low, a_high)

    def get_v(self, s):
        if s.ndim < 2: s = s[np.newaxis, :]
        return self.sess.run(self.v, {self.tfs: s})[0, 0]


# ENV_NAME = 'CartPole-v2'
env = QUADROTOR()
# env = gym.make(ENV_NAME)
env = env.unwrapped
a_high = env.force_space.high
a_low = env.force_space.low
ppo = PPO(a_high)
all_ep_r = []
fig = plt.figure()
ax = plt.subplot(projection='3d')  # 创建一个三维的绘图工程
plot_x = []
plot_y = []
plot_z = []
fig = plt.figure()
var = 0.5 * a_high
for i in range(MAX_EPISODES):
    ax = plt.subplot(projection='3d')  # 创建一个三维的绘图工程
Exemplo n.º 2
0
#####################  hyper parameters  ####################

MAX_EPISODES = 20000
MAX_EP_STEPS = 2500
LR_A = 0.0001  # learning rate for actor
LR_C = 0.0002  # learning rate for critic
GAMMA = 0.9988  # reward discount
TAU = 0.01  # soft replacement
MEMORY_CAPACITY = 50000
BATCH_SIZE = 256
labda = 10.
RENDER = True
tol = 0.001
print(LR_A)
# ENV_NAME = 'CartPole-v2'
env = QUADROTOR()
# env = gym.make(ENV_NAME)
env = env.unwrapped

EWMA_p = 0.95
EWMA_step = np.zeros((1, MAX_EPISODES + 1))
EWMA_reward = np.zeros((1, MAX_EPISODES + 1))
iteration = np.zeros((1, MAX_EPISODES + 1))

###############################  DDPG  ####################################


class DDPG(object):
    def __init__(
        self,
        a_dim,
Exemplo n.º 3
0
# ax.set_xlabel('X')
# plt.draw()
# plt.pause(10000)
# plt.savefig('3D.jpg')
# plt.close()

#####################  trajactory test DISCRETE ####################
fig = plt.figure()
ax = plt.subplot(projection='3d')  # 创建一个三维的绘图工程
plot_x = []
plot_y = []
plot_z = []
d_x = []
d_y = []
d_z = []
env = QUADROTOR()
traj = 'circle'
des_start = env.trajectory(traj, 0)
state = env.reset_traj(des_start[0], 0)
t = 0
for step in range(3000):
    desired_state = env.trajectory(traj, t)
    print(desired_state[2])
    F, M = env.controller(desired_state, env.state)
    s_, _, _ = env.step(F, M)
    t = t + 0.005
    # print(step,s_[0],s_[1],s_[2])
    plot_x.append(s_[0])
    plot_y.append(s_[1])
    plot_z.append(s_[2])
    d_x.append(desired_state[0][0])