def main(): actor_learning_rate = [ 1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3, 10**-2 ] critic_learning_rate = [ 1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3, 10**-2 ] tau = [.9, .93, .95, .97, .99] batch_size = [32, 64, 128, 256] p_rand = [0, .1, .2, .3, .4] sigma = [0, .1, .2, .3, .4] L2_norm_coeff = [0, .01, .03, .1, .3, .6, 1] load_checkpoint = False env = gym.make("CartPoleContinuousBulletEnv-v0") agent = Agent(input_dims=env.observation_space.shape, n_actions=env.action_space.shape[0]) episodes = 250 filename = 'MoutainCarContinuous.png' figure_file = 'plots/' + filename best_score = env.reward_range[0] score_history = [] if load_checkpoint: agent.load_models() env.render(mode='human') for i in range(episodes): observation = env.reset() done = False score = 0 while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.remember(observation, action, reward, observation_, done) if not load_checkpoint: agent.learn() else: env.render() observation = observation_ score_history.append(score) avg_score = np.mean(score_history[-100:]) if avg_score > best_score: best_score = avg_score if not load_checkpoint: agent.save_models() print("episode", i, "score", score, "average score", avg_score) if not load_checkpoint: x = [i + 1 for i in range(episodes)] plot_error_drop(x, score_history)
from ddpg_torch import Agent import gym import numpy as np #from utils import PlotLearning env = gym.make('LunarLanderContinuous-v2') agent = Agent(alpha=0.000025,beta=0.00025, input_dims=[8],tau=0.001,env=env) np.random.seed(0) agent.load_models() score_history = [] ep = 0 while True: ep += 1 done = False score = 0 obs = env.reset() while not done: env.render() act = agent.choose_action(obs) new_state,reward,done,info = env.step(act) agent.remember(obs,act,reward,new_state,int(done)) #agent.learn() score += reward obs = new_state score_history.append(score) print('episode',ep,'score%.2f' % score, '100 game average %.2f' % np.mean(score_history[-100:])) if ep % 25 == 0: agent.save_models() if len(score_history) >= 25 and np.mean(score_history[-25:]) > 200: break filename = 'pendulum.png' #plotLearning(score_history, filename, window=100)
str(agent.beta) + '_' + str(n_games) + '_games' figure_file = 'plots/' + filename + '.png' if load_checkpoint: agent.load_models() best_score = env.reward_range[0] score_history = [] for i in range(n_games): observation = env.reset() observation=observation.reshape(observation.shape[0]*observation.shape[1],) done = False score = 0 agent.noise.reset() while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) observation_ = observation_.reshape(observation_.shape[0]*observation_.shape[1],) if not test_agent: agent.remember(observation, action, reward, observation_, done) agent.learn() score += reward observation = observation_ env.render() score_history.append(score) avg_score = np.mean(score_history[-100:]) if not test_agent: if avg_score > best_score: best_score = avg_score