def main(): print("Start Atari games") environment_name = "PongNoFrameskip-v4" env = make_env(environment_name) best_score = -np.inf load_checkpoint = False n_games = 500 lr = 0.0001 epsilon = 1 gamma = 0.99 input_dims = env.observation_space.shape n_actions = env.action_space.n eps_min = 0.01 eps_dec = 5e-7 replace = 1000 algo = None mem_size = 50000 batch_size = 32 chkpt_dir = "models/" algo = "DeepQAgent" agent = DeepQAgent(lr, n_actions, input_dims, chkpt_dir, epsilon, gamma, mem_size, batch_size, eps_min, eps_dec, replace, algo, environment_name) if load_checkpoint: agent.load_models() fname = agent.algo + "_" + agent.env_name + '_lr' + str( agent.lr) + "_" + str(n_games) + "_games" figure_file = "plots/" + fname + ".png" n_steps = 0 scores, eps_history, steps_array = [], [], [] for i in range(n_games): done = False score = 0 observation = env.reset() while not done: action = agent.get_action(observation) new_observation, reward, done, info = env.step(action) score += reward if not load_checkpoint: agent.store_transition(observation, action, reward, new_observation, int(done)) agent.learn() observation = new_observation n_steps += 1 scores.append(score) steps_array.append(n_steps) avg_score = np.mean(scores[-100:]) print( "episode ", i + 1, "score: ", score, "average score %.1f best score %.1f epsilon %.2f" % (avg_score, best_score, agent.epsilon), " steps ", n_steps) if avg_score > best_score: if not load_checkpoint: agent.save_models() best_score = avg_score eps_history.append(agent.epsilon) plot_learning_curve(steps_array, scores, eps_history, figure_file) print("End Atari games")
n_episodes = 10000 win_pct_list = [] scores = [] eps_history = [] for i in range(n_episodes): done = False score = 0 s = env.reset() done = False while not done: a = A.pick_action(s) s_, r, done, info = env.step(a) A.learn(s, a, r, s_) score += r s = s_ scores.append(score) eps_history.append(A.eps) if i % 100 == 0: win_pct = np.mean(scores[-100:]) win_pct_list.append(win_pct) if i % 1000 == 0: print('episode', i, 'win pct %.2f' % win_pct, 'eps %2.f' % A.eps) #plt.plot(win_pct_list) #plt.show() x = [i + 1 for i in range(n_episodes)]