def test(): # trained model directory directory = "./preTrained" filename = "ddpg" # initialize DDPG agent agent = DDPG( state_dim=env.observation_space.shape[0] - 1, # state_dim=env.observation_space.shape[0] action_dim=env.action_space.shape[0], action_bounds=env.action_space.high[0], lr=0) # load trained agent assert os.path.exists( directory), "Trained model not exists, try running train.py first." agent.load(directory, filename) for epoch in range(max_episode): # reset environment state = env.reset() done = False rewards = 0 while not done: action = agent.select_action(state) # perform one step update on pendulum next_state, reward, done, _ = env.step(action) # go to next state state = next_state rewards += reward # render envrionment env.render() print("Episode:{:2d}, Rewards:{:3f}".format(epoch, rewards))
def train(): # save trained model under preTrained directory directory = "./preTrained" filename = "ddpg" # set epsilon exploration rate and decay rate epsilon = 0.2 eps_min = 1e-3 eps_decay = 2e-3 gaussian_exploration_noise = 0.2 # set learning rate and batch size lr = 1e-3 batch_size = 128 # initialize replay memory replay_buffer = ReplayBuffer(max_size=5e4) # rewards for each episode / for plot rewards = np.zeros(max_episode) # initialize DDPG agent agent = DDPG( state_dim=env.observation_space.shape[0] - 1, # state_dim=env.observation_space.shape[0] action_dim=env.action_space.shape[0], action_bounds=env.action_space.high[0], lr=lr) for epoch in range(max_episode): # reset environment state = env.reset() done = False # epsilon decay epsilon = eps_min if (epsilon - eps_decay) < 0 else (epsilon - eps_decay) while not done: if np.random.random_sample() > epsilon: action = agent.select_action(state) action = action + np.random.normal(0, gaussian_exploration_noise) else: action = np.array( np.random.uniform(env.action_space.low[0], env.action_space.high[0])).reshape(1, ) # perform one step update on pendulum next_state, reward, done, _ = env.step(action) env.render() replay_buffer.add((state, action, reward, next_state, done)) # go to next state state = next_state # store rewards rewards[epoch] += reward # update the DDPG agent sampled on replay buffer and n_iter times agent.update(buffer=replay_buffer, n_iter=10, batch_size=batch_size) if rewards[epoch] > -1.0: print("task solved!\n") # save trained agent if not os.path.exists(directory): os.mkdir(directory) agent.save(directory, filename) # print rewards of current episode if epoch % 10 == 0: print('train epoch:', epoch, 'rewards:', rewards[epoch]) return rewards