Exemplo n.º 1
0
state_size = 8

agent = PolicyGradientAgent(state_size, action_size)

print("Training...")
train_episodes = 5000
avg_score = 0
loss = 0
for episode in range(train_episodes):

    state = env.reset()
    state = np.reshape(state, [1, state_size])
    cum_reward = 0
    for i in range(1000):

        action = agent.act(state, is_training=True)

        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])

        agent.remember(state, action, reward)

        state = next_state
        cum_reward += reward

        if done:
            avg_score += cum_reward
            break

    current_loss = agent.update()[0]
    loss += current_loss