params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name) return norm_dist, params, mu, mu def choose_action(self, s): s = s[np.newaxis, :] a = self.sess.run(self.sample_op, {self.tfs: s})[0] # print(self.sess.run(self.mu, {self.tfs: s})[0]) return a #np.clip(a, a_low, a_high) def get_v(self, s): if s.ndim < 2: s = s[np.newaxis, :] return self.sess.run(self.v, {self.tfs: s})[0, 0] # ENV_NAME = 'CartPole-v2' env = QUADROTOR() # env = gym.make(ENV_NAME) env = env.unwrapped a_high = env.force_space.high a_low = env.force_space.low ppo = PPO(a_high) all_ep_r = [] fig = plt.figure() ax = plt.subplot(projection='3d') # 创建一个三维的绘图工程 plot_x = [] plot_y = [] plot_z = [] fig = plt.figure() var = 0.5 * a_high for i in range(MAX_EPISODES): ax = plt.subplot(projection='3d') # 创建一个三维的绘图工程
##################### hyper parameters #################### MAX_EPISODES = 20000 MAX_EP_STEPS = 2500 LR_A = 0.0001 # learning rate for actor LR_C = 0.0002 # learning rate for critic GAMMA = 0.9988 # reward discount TAU = 0.01 # soft replacement MEMORY_CAPACITY = 50000 BATCH_SIZE = 256 labda = 10. RENDER = True tol = 0.001 print(LR_A) # ENV_NAME = 'CartPole-v2' env = QUADROTOR() # env = gym.make(ENV_NAME) env = env.unwrapped EWMA_p = 0.95 EWMA_step = np.zeros((1, MAX_EPISODES + 1)) EWMA_reward = np.zeros((1, MAX_EPISODES + 1)) iteration = np.zeros((1, MAX_EPISODES + 1)) ############################### DDPG #################################### class DDPG(object): def __init__( self, a_dim,
# ax.set_xlabel('X') # plt.draw() # plt.pause(10000) # plt.savefig('3D.jpg') # plt.close() ##################### trajactory test DISCRETE #################### fig = plt.figure() ax = plt.subplot(projection='3d') # 创建一个三维的绘图工程 plot_x = [] plot_y = [] plot_z = [] d_x = [] d_y = [] d_z = [] env = QUADROTOR() traj = 'circle' des_start = env.trajectory(traj, 0) state = env.reset_traj(des_start[0], 0) t = 0 for step in range(3000): desired_state = env.trajectory(traj, t) print(desired_state[2]) F, M = env.controller(desired_state, env.state) s_, _, _ = env.step(F, M) t = t + 0.005 # print(step,s_[0],s_[1],s_[2]) plot_x.append(s_[0]) plot_y.append(s_[1]) plot_z.append(s_[2]) d_x.append(desired_state[0][0])