Exemplo n.º 1
0
        action = agent.act(state, is_training=True)

        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])

        agent.remember(state, action, reward)

        state = next_state
        cum_reward += reward

        if done:
            avg_score += cum_reward
            break

    current_loss = agent.update()[0]
    loss += current_loss

    if episode % 100 == 0 and episode != 0:
        print("Episode: " + str(episode) + "/" + str(train_episodes) + ", score: " + str(avg_score/100) + ", Loss : " + str(loss/100))
        avg_score = 0
        loss = 0


print("Testing...")

test_episodes = 100
score = 0
for i in range(test_episodes):

    state = env.reset()