train_episodes = 5000 avg_score = 0 loss = 0 for episode in range(train_episodes): state = env.reset() state = np.reshape(state, [1, state_size]) cum_reward = 0 for i in range(1000): action = agent.act(state, is_training=True) next_state, reward, done, _ = env.step(action) next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward) state = next_state cum_reward += reward if done: avg_score += cum_reward break current_loss = agent.update()[0] loss += current_loss if episode % 100 == 0 and episode != 0: print("Episode: " + str(episode) + "/" + str(train_episodes) + ", score: " + str(avg_score/100) + ", Loss : " + str(loss/100)) avg_score = 0 loss = 0