import _util as u # Command창에서 색깔 표시 cr.init(autoreset=True) env = gym.make('CartPole-v0') # env = gym.make('FrozenLake-v0') env.reset() random_episodes = 0 reward_sum = 0 key = b'_K' while random_episodes < 5: env.render() # if u.kbhit(): key = u.inkey() if key == b'q': print("Game aborted!") break action = u.arrow_keys[key] # 0-Left, 1-Down, 2-Right, 3-Up # action = env.action_space.sample() print(action) observation, reward, done, info = env.step(action) print(observation, reward, done, action) reward_sum += reward if done: random_episodes += 1 print("Reward for this episode was:", random_episodes, reward_sum) reward_sum = 0 env.reset()
if i % 100 == 0: print('num_episodes = {:4d}, cost = {:7.5f} '.format( i, cost_sum / count)) if len(rList) > 10 and np.mean(rList[-10:]) > 500: break print("Success rate: " + str(sum(rList) / num_episodes) + "%") observation = env.reset() reward_sum = 0 while True: env.render() observation = np.reshape(observation, [1, m.input_size]) Q_pred = sess.run(m.Y_, feed_dict={m.X: observation}) action = np.argmax(Q_pred) observation, reward, done, _ = env.step(action) reward_sum += reward if done: print("Total score: {}".format(reward_sum)) break plt.bar(range(len(rList)), rList, color="blue") plt.show() k = u.inkey() env.close()