VAR = 3 # 随机策略随机部分方差 KESI = 0.9995 # 随机策略随机部分方差衰减因子 RENDER = True # 是否展示 # endregion env = gym.make('Pendulum-v0') s_dim = env.observation_space.shape[0] # 状态空间维度 a_dim = env.action_space.shape[0] # 动作空间维度 a_bound = env.action_space.low, env.action_space.high # 动作取值上下界 ddpg = DDPG(s_dim, a_dim, a_bound, MEMORY_CAPACITY, BATCH_SIZE, GAMMA, ALPHA_A, ALPHA_C, TAO) ddpg.initail_net('./result.ckpt') for each_episode in range(MAX_EPISODES): ep_reward = 0 s = env.reset() for each_step in range(MAX_EP_STEPS): if RENDER: env.render() a = ddpg.choose_action(s[np.newaxis, :])[0] print(a) s_, r, done, _ = env.step(a)
VAR = 3 # 随机策略随机部分方差 KESI = .99995 # 随机策略随机部分方差衰减因子 RENDER = True # 是否展示 # endregion env = gym.make('Pendulum-v0') env.seed(1) s_dim = env.observation_space.shape[0] # 状态空间维度 a_dim = env.action_space.shape[0] # 动作空间维度 a_bound = env.action_space.low, env.action_space.high # 动作取值上下界 ddpg = DDPG(s_dim, a_dim, a_bound, MEMORY_CAPACITY, BATCH_SIZE, GAMMA, ALPHA_A, ALPHA_C, TAO) ddpg.initail_net() var = VAR for each_episode in range(MAX_EPISODES): ep_reward = 0 s = env.reset() for each_step in range(MAX_EP_STEPS): if RENDER: env.render() # 根据状态选择动作并加上随机部分 # 这里必须加上[0]索引,因为env.step一次只能接受一个动作 a = ddpg.choose_action(s[np.newaxis, :])[0]