from ddpg import Agent import gym import numpy as np env = gym.make('LunarLanderContinuous-v2') agent = Agent(alpha = 0.000025, beta = 0.00025, input_dims = [8], tau = 0.001, env = env, batch_size = 64, layer1_size = 400, layer2_size = 300, n_actions = 2) np.random.seed(42) score_history = [] for i in range(1000): done = False score = 0 obs = env.reset() while not done: act = agent.choose_action(obs) new_state, reward, done, info = env.step(act) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward obs = new_state score_history.append(score) print("Episode - {} Score - {} 100 game average {}".format(i, score, np.mean(score_history[-100:]))) if i % 25 == 0: agent.save_models() filename = l
env_params = env.reset() obs = env_params[ 'observation'] # Remove 'observation' indexing for envs with no dict d_goal = env_params['desired_goal'] net_input = np.hstack((obs, d_goal)) # print(obs.shape) done = False score = 0 while not done: act = agent.choose_action(net_input) # print(act) new_state, reward, done, info = env.step(act) new_state = np.hstack( (new_state['observation'], new_state['desired_goal'] )) ## Remove 'observation' indexing for envs with no dict agent.remember(net_input, act, reward, new_state, int(done)) agent.learn() score += reward net_input = new_state #env.render() score_history.append(score) if i % 25 == 0: agent.save_models() print('episode ', i, 'score %.2f' % score, 'trailing 100 games avg %.3f' % np.mean(score_history[-100:])) filename = 'LunarLander-alpha000025-beta00025-400-300.png' plotLearning(score_history, filename, window=100)
base_dir: str = os.path.dirname(__file__) figure_file = os.path.abspath(os.path.join(base_dir, 'plots/pendulum.png')) best_score = env.reward_range[0] score_history = [] load_checkpoint = False if load_checkpoint: n_steps = 0 while n_steps <= agent.batch_size: observation = env.reset() action = env.action_space.sample() observation_, reward, done, info = env.step(action) agent.remember(observation, action, reward, observation_, done) n_steps += 1 agent.learn() agent.load_models() evaluate = True else: evaluate = False for i in range(n_episodes): observation = env.reset() done = False score = 0 while not done: action = agent.choose_action(observation, evaluate) env.render() observation_, reward, done, info = env.step(action)
input_dims=[3], tau=0.001, env=env, n_actions=1) np.random.seed(0) score_history = [] for episode in range(1000): state = env.reset() done = False score = 0 while not done: action = agent.choose_action(state) next_state, reward, done, info = env.step(action) agent.remember(state, action, reward, next_state, int(done)) agent.learn() score += reward state = next_state score_history.append(score) print('Episode {}, Score: {:.2f}, 100 game average: {:.2f}'.format( episode, score, np.mean(score_history[-100:]))) filename = 'pendulum.png' plotLearning(score_history, filename, window=100)
history = [] critic_loss = [] actor_loss = [] for i in range(n_episodes): loss1 = [] loss2 = [] obs = env.reset() done = False score = 0 while not done: action = agent.choose_action(obs) obs_, reward, done, _ = env.step(action) score += reward agent.remember(obs, action, reward, obs_, done) agent.learn() obs = obs_ history.append(score) avg = np.zeros(len(history)) for i in range(len(avg)): avg[i] = np.mean(history[max(0, i - 25):(i + 1)]) plt.plot(range(1, n_episodes + 1), avg) plt.xlabel('Episode') plt.ylabel('Reward') plt.title(f'{env_name} Showing Mean Reward') plt.savefig(f'results/{env_name}-DDPG-Showing-Mean-Rewards-b128.jpg') plt.show()