def main(): actor_learning_rate = [ 1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3, 10**-2 ] critic_learning_rate = [ 1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3, 10**-2 ] tau = [.9, .93, .95, .97, .99] batch_size = [32, 64, 128, 256] p_rand = [0, .1, .2, .3, .4] sigma = [0, .1, .2, .3, .4] L2_norm_coeff = [0, .01, .03, .1, .3, .6, 1] load_checkpoint = False epochs = 40 env = gym.make("FetchReach-v1") agent = Agent(n_actions=env.action_space.shape[0], load_checkpoint=load_checkpoint, env=env, epochs=epochs) if load_checkpoint is False: score_history = agent.train() else: agent.load_models() agent.env.render(mode='human') agent.episodes = 40 score_history = agent.eval_agent() if not load_checkpoint: x = [i + 1 for i in range(epochs)] plot_error_drop(score_history)
from ddpg_torch import Agent import gym import numpy as np #from utils import PlotLearning env = gym.make('LunarLanderContinuous-v2') agent = Agent(alpha=0.000025,beta=0.00025, input_dims=[8],tau=0.001,env=env) np.random.seed(0) agent.load_models() score_history = [] ep = 0 while True: ep += 1 done = False score = 0 obs = env.reset() while not done: env.render() act = agent.choose_action(obs) new_state,reward,done,info = env.step(act) agent.remember(obs,act,reward,new_state,int(done)) #agent.learn() score += reward obs = new_state score_history.append(score) print('episode',ep,'score%.2f' % score, '100 game average %.2f' % np.mean(score_history[-100:])) if ep % 25 == 0: agent.save_models() if len(score_history) >= 25 and np.mean(score_history[-25:]) > 200: break filename = 'pendulum.png' #plotLearning(score_history, filename, window=100)
self.state = [0, 0, 0] self.steps = 0 self.count = 0 return np.array(self.state) if __name__ == '__main__': setpoint = [18] * 300 env = Process() n_games = 200 fc1_dims = 400 fc2_dims = 300 agent = Agent(alpha=0.001, beta=0.001, input_dims=env.observation_space.shape, tau=0.01, batch_size=128, n_actions=env.action_space.shape[0], fc1_dims=400, fc2_dims=300) f_name = 'Process' + str(agent.alpha) + 'beta_' + str( agent.beta) + '_' + str(n_games) + '_' + 'games' + 'fc1_dims_' + str( fc1_dims) + '_' + 'fc2_dims_' + str(fc2_dims) figure_file = 'plots/' + f_name + '.png' best_score = env.reward_range[0] scores = [] Actions = [] Observations = [] for i in tqdm(range(n_games)): obs = env.reset()
"TIME_STEP": 0.01, "VIDEO_FPS": 30, } # env = LinkageEnv(angles_file, params, verbose=0) env = gym.make("LunarLanderContinuous-v2") agent = Agent( lr_actor=0.000025, lr_critic=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=2, ) np.random.seed(0) score_history = [] liveloss = PlotLosses() for i in range(100000): done = False score = 0
}, 'duration': 3000, 'offroad_terminal':True, 'policy_frequency':10, 'simulation_frequency':10, 'vehicles_count':20, } env = gym.make('highway-v0') env.configure(config) observation = env.reset() observation=observation.reshape(observation.shape[0]*observation.shape[1],) print(observation.shape) print(env.action_space.sample()) print(env.config) agent = Agent(alpha=0.0001, beta=0.001, input_dims=observation.shape, tau=0.001, batch_size=64, fc1_dims=400, fc2_dims=300, n_actions=env.action_space.shape[0]) n_games = 1000 test_agent = True load_checkpoint=False if test_agent: n_games=100 load_checkpoint=True filename = 'Highway_alpha_' + str(agent.alpha) + '_beta_' + \ str(agent.beta) + '_' + str(n_games) + '_games' figure_file = 'plots/' + filename + '.png' if load_checkpoint: agent.load_models()
from ddpg_torch import Agent import gym import numpy as np from utils import plotLearning env = gym.make('LunarLanderContinuous-v2') agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=2) #agent.load_models() np.random.seed(0) score_history = [] for i in range(5000): obs = env.reset() done = False score = 0 while not done: act = agent.choose_action(obs) new_state, reward, done, info = env.step(act) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward obs = new_state
def plotLearning(scores, filename, x=None, window=5): N = len(scores) running_avg = np.empty(N) for t in range(N): running_avg[t] = np.mean(scores[max(0, t-window):(t+1)]) if x is None: x = [i for i in range(N)] plt.ylabel('Score') plt.xlabel('Game') plt.plot(x, running_avg) plt.savefig(filename) plt.close() env = gym.make('MountainCarContinuous-v0') agent = Agent(alpha=0.033, beta=0.33, input_dims=[2], tau=0.001, env=env, batch_size=64, layer1_size=75, layer2_size=50, n_actions=1) #agent.load_models() np.random.seed(3) score_history = [] for i in range(150): obs = env.reset() done = False score = 0 while not done: act = agent.choose_action(obs) new_state, reward, done, info = env.step(act) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward
def main(): actor_learning_rate = [ 1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3, 10**-2 ] critic_learning_rate = [ 1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3, 10**-2 ] tau = [.9, .93, .95, .97, .99] batch_size = [32, 64, 128, 256] p_rand = [0, .1, .2, .3, .4] sigma = [0, .1, .2, .3, .4] L2_norm_coeff = [0, .01, .03, .1, .3, .6, 1] load_checkpoint = False env = gym.make("CartPoleContinuousBulletEnv-v0") agent = Agent(input_dims=env.observation_space.shape, n_actions=env.action_space.shape[0]) episodes = 250 filename = 'MoutainCarContinuous.png' figure_file = 'plots/' + filename best_score = env.reward_range[0] score_history = [] if load_checkpoint: agent.load_models() env.render(mode='human') for i in range(episodes): observation = env.reset() done = False score = 0 while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.remember(observation, action, reward, observation_, done) if not load_checkpoint: agent.learn() else: env.render() observation = observation_ score_history.append(score) avg_score = np.mean(score_history[-100:]) if avg_score > best_score: best_score = avg_score if not load_checkpoint: agent.save_models() print("episode", i, "score", score, "average score", avg_score) if not load_checkpoint: x = [i + 1 for i in range(episodes)] plot_error_drop(x, score_history)
import gym_lqr #env = gym.make('gym_lqr:lqr-stochastic-v0') #env = gym.make('gym_lqr:lqr-2d-v0') #env = gym.make('gym_lqr:lqr-v0') #env = gym.make('InvertedPendulumPyBulletEnv-v0') #env = gym.make('InvertedPendulum-v2') env = gym.make('Walker2DPyBulletEnv-v0') #env = gym.make('Ant-v2') #print(env.action_space.shape[0]) print(env.action_space.high) agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[env.observation_space.shape[0]], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=env.action_space.shape[0], action_bound=env.action_space.high) n_games = 3000 # uncomment this line and do a mkdir tmp && mkdir video if you want to # record video of the agent playing the game. #env = wrappers.Monitor(env, 'tmp/video', video_callable=lambda episode_id: True, force=True) filename = 'inverted_pendulum.png' #print(env.action_space.high) figure_file = 'plots/' + filename