Пример #1
0
def train(env, env_name, agent, episodes, rpm):
    max_reward = -1e10
    while len(rpm) < opt["MEMORY_WARMUP_SIZE"]:
        run_episode(env, agent, rpm)
    for i in range(episodes):
        step, total_reward = run_episode(env, agent, rpm)
        writer.add_scalar(env_name, total_reward, i)
        if i % 10 == 0:
            print("Episode {}, step {} Reward Sum {}.".format(i, step, total_reward))
            logging.warning("Episode {}, step {} Reward Sum {}.".format(i, step, total_reward))

        if (i + 1) % 100 == 0:
            total_reward = evaluate(10, env, agent, render=False) 
            if total_reward >= max_reward:
                max_reward = total_reward
                agent.save(env_name)
Пример #2
0
 def do_save(self, args):
     agent.save()
Пример #3
0
        # average angle in episode
        scores.append(score)

        if len(scores) > 20: scores = scores[-20:]
        if len(rewards) > 20: rewards = rewards[-20:]

        if highest_score == None or score < highest_score:
            highest_score = score

        print(
            "episode: {}/{}, average angle: {}, best average angle: {}, last 20 average: {}, e: {}, in memory: {}, batch size: {}"
            .format(e + 1, episode_count, round(score, 2),
                    round(highest_score, 2), round(mean(scores), 2),
                    round(agent.epsilon, 3), len(agent.memory), batch_size))

        if len(scores) >= 5 and score <= 10 and sum(scores[-5:]) <= 50:
            agent.save()

        if len(scores) >= 15 and sum(scores[-15:]) <= 75:
            print("training successfull!")
            agent.save("final")
            break

        if (e + 1) % 5 == 0:
            print("Took", round((time.time() - start) / 60, 2), "minutes\n")
            start = time.time()
            agent.merge_models()

    agent.save()
    print("Total training time:", round((time.time() - first_start) / 60, 2),
          "minutes")
Пример #4
0
env.print_info()

#agent = agent.Agent(env)
agent = agent_dqn.DQNAgent(env, "arkanoid_network.json")

training_iterations = 250000

for i in range(0, training_iterations):
    agent.main()

    if (i % 100) == 0:
        progress = 100.0 * i / training_iterations
        print("training done = ", progress, " score = ", env.get_score())

agent.save("arkanoid_network/")

env.reset_score()
agent.run_best_enable()

testing_iterations = 10000
for i in range(0, testing_iterations):
    agent.main()

print("********************************")
print("testing score = ", env.get_score())
print("********************************")

while True:
    agent.main()
    env.render()