from ddpg import Agent import gym import numpy as np env = gym.make('LunarLanderContinuous-v2') agent = Agent(alpha = 0.000025, beta = 0.00025, input_dims = [8], tau = 0.001, env = env, batch_size = 64, layer1_size = 400, layer2_size = 300, n_actions = 2) np.random.seed(42) score_history = [] for i in range(1000): done = False score = 0 obs = env.reset() while not done: act = agent.choose_action(obs) new_state, reward, done, info = env.step(act) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward obs = new_state score_history.append(score) print("Episode - {} Score - {} 100 game average {}".format(i, score, np.mean(score_history[-100:]))) if i % 25 == 0: agent.save_models() filename = l
normalized.append(reward) if reward > best_parameters[1]: best_parameters[0] = action best_parameters[1] = reward best_parameters[2] = step*(episode + 1) all_steps[episode] = (step*episode+step) exit = True rewards.append(reward) if reward > -10000: inlook.append(reward) avg_rewards.append(np.mean(inlook[-10:])) if reward > 0: done = True env.render() agent.learn(batch_size) state = new_state inlook2 = [] avg_rewards2 = [] for episode in range(1): noise.reset() eps_reward = 0 print(episode) best_parameters = [(0,0,0), 0, 0] step = 0 for i in range(total_steps): print(step) step += 1 setpoint = 20 if random == False else np.random.random()*100