ms, ma = env_model.sample_s_a() # ms in here is a str mr, ms_ = env_model.get_r_s_(ms, ma) RL.learn(ms, ma, mr, str(ms_)) # print(env_model.database) # print('################') # print(RL.q_table) # print('################') s = s_ s2 = s2_ if done: s = env.reset() break if done2: s2 = env.reset2() break # end of game print('game over') print(RL.q_table) env.destroy() if __name__ == "__main__": env = Maze() env_model = EnvModel(actions=list(range(env.n_actions))) RL = Learning(actions=list(range(env.n_actions))) env.after(0, update) env.mainloop()
from env import Maze from q_learning import QLearning def update(): for episode in range(20): state = env.reset() step_count, done = 0, False while not done: env.render() action = RL.choose_action(str(state)) state_, reward, done = env.step(action) step_count += 1 RL.learn(str(state), action, reward, str(state_)) state = state_ print(' Round over at: {0} round, Total steps: {1} steps'.format( episode, step_count)) if __name__ == '__main__': env = Maze() agent = QLearning(actions=list(range(env.n_actions))) env.after(100, update()) # env.mainloop() print('\n Q Table') print(agent.q_table) agent.q_table.to_csv('Q_Table.csv')
state_, reward, done = env.step(action) step_count += 1 # 增加步数 # 机器人大脑从这个过渡(transition) (state, action, reward, state_) 中学习 RL.learn(str(state), action, reward, str(state_)) # 机器人移动到下一个 state state = state_ # 如果踩到炸弹或者找到宝藏, 这回合就结束了 if done: print("回合 {} 结束. 总步数 : {}\n".format(episode + 1, step_count)) break # 结束游戏并关闭窗口 print('游戏结束') env.destroy() if __name__ == "__main__": # 创建环境 env 和 RL env = Maze() RL = QLearning(actions=list(range(env.n_actions))) # 开始可视化环境 env.after(100, update) env.mainloop() print('\nQ 表:') print(RL.q_table)
if (step > 200) and (step % 5 == 0): RL.learn() # swap observation observation = observation_ ## break while loop when end of this episode #if done: #break step += 1 time.sleep(60) if __name__ == "__main__": # maze game env = Maze() RL = DeepQNetwork( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) env.after(100, run_maze) env.mainloop() RL.plot_cost()