示例#1
0
train_episodes = 5000
avg_score = 0
loss = 0
for episode in range(train_episodes):

    state = env.reset()
    state = np.reshape(state, [1, state_size])
    cum_reward = 0
    for i in range(1000):

        action = agent.act(state, is_training=True)

        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])

        agent.remember(state, action, reward)

        state = next_state
        cum_reward += reward

        if done:
            avg_score += cum_reward
            break

    current_loss = agent.update()[0]
    loss += current_loss

    if episode % 100 == 0 and episode != 0:
        print("Episode: " + str(episode) + "/" + str(train_episodes) + ", score: " + str(avg_score/100) + ", Loss : " + str(loss/100))
        avg_score = 0
        loss = 0