Exemplo n.º 1
0
              batch_size=64,
              layer1_size=256,
              layer2_size=128,
              n_actions=3)

#agent.load_models()
# np.random.seed(1)

score_history = []
for i in range(50):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        act = agent.choose_action(obs)
        print(act)
        new_state, reward, done, info = env.step(act)
        agent.remember(obs, act, reward, new_state, int(done))
        agent.learn()
        score += reward
        obs = new_state
        #env.render()
    score_history.append(score)

    if i % 10 == 0:
        agent.save_models()
        env.render()

    print('episode ', i, 'score %.2f' % score,
          'trailing 25 games avg %.3f' % np.mean(score_history[-25:]))
            next_state, reward, done, _ = env.step(action)
            agent.memory.add(state, action, reward, next_state, done)

            state = next_state
            score += reward

            agent.optimize()

        score_history.append(score)
        avg_score: float = np.mean(score_history[-100:])
        avg_history.append(avg_score)

        if avg_score > best_score:
            best_score = avg_score
            agent.save_models(data_path)
            print(f'Episode:{i}'
                  f'\t ACC. Rewards: {score:3.2f}'
                  f'\t AVG. Rewards: {avg_score:3.2f}'
                  f'\t *** MODEL SAVED! ***')
        else:
            print(f'Episode:{i}'
                  f'\t ACC. Rewards: {score:3.2f}'
                  f'\t AVG. Rewards: {avg_score:3.2f}')

        episode_info = {
            'Episode': i,
            'Total Episodes': n_games,
            'Epidosic Summed Rewards': score,
            'Moving Mean of Episodic Rewards': avg_score
        }