def train(env, env_name, agent, episodes, rpm): max_reward = -1e10 while len(rpm) < opt["MEMORY_WARMUP_SIZE"]: run_episode(env, agent, rpm) for i in range(episodes): step, total_reward = run_episode(env, agent, rpm) writer.add_scalar(env_name, total_reward, i) if i % 10 == 0: print("Episode {}, step {} Reward Sum {}.".format(i, step, total_reward)) logging.warning("Episode {}, step {} Reward Sum {}.".format(i, step, total_reward)) if (i + 1) % 100 == 0: total_reward = evaluate(10, env, agent, render=False) if total_reward >= max_reward: max_reward = total_reward agent.save(env_name)
def do_save(self, args): agent.save()
# average angle in episode scores.append(score) if len(scores) > 20: scores = scores[-20:] if len(rewards) > 20: rewards = rewards[-20:] if highest_score == None or score < highest_score: highest_score = score print( "episode: {}/{}, average angle: {}, best average angle: {}, last 20 average: {}, e: {}, in memory: {}, batch size: {}" .format(e + 1, episode_count, round(score, 2), round(highest_score, 2), round(mean(scores), 2), round(agent.epsilon, 3), len(agent.memory), batch_size)) if len(scores) >= 5 and score <= 10 and sum(scores[-5:]) <= 50: agent.save() if len(scores) >= 15 and sum(scores[-15:]) <= 75: print("training successfull!") agent.save("final") break if (e + 1) % 5 == 0: print("Took", round((time.time() - start) / 60, 2), "minutes\n") start = time.time() agent.merge_models() agent.save() print("Total training time:", round((time.time() - first_start) / 60, 2), "minutes")
env.print_info() #agent = agent.Agent(env) agent = agent_dqn.DQNAgent(env, "arkanoid_network.json") training_iterations = 250000 for i in range(0, training_iterations): agent.main() if (i % 100) == 0: progress = 100.0 * i / training_iterations print("training done = ", progress, " score = ", env.get_score()) agent.save("arkanoid_network/") env.reset_score() agent.run_best_enable() testing_iterations = 10000 for i in range(0, testing_iterations): agent.main() print("********************************") print("testing score = ", env.get_score()) print("********************************") while True: agent.main() env.render()