RL = DuelingDQN( env.n_actions, env.n_features, #observation/state 的属性,如长宽高 learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, dueling=True, replace_target_iter=200, # 每 200 步替换一次 target_net 的参数 memory_size=2000, # 记忆上限 # output_graph=True # 是否输出 tensorboard 文件 ) env.after(100, run_maze) #进行强化学习训练 env.mainloop() # 观看训练时间曲线 his_dueling = np.vstack((episodes, steps)) file = open('his_dueling', 'wb') pickle.dump(his_dueling, file) file.close() plt.plot(his_dueling[0, :], his_dueling[1, :] - his_dueling[1, 0], c='b', label='Dueling DQN') plt.legend(loc='best') # legend图例,其中’loc’参数有多种,’best’表示自动分配最佳位置 plt.ylabel('total training time') plt.xlabel('episode') plt.grid() # 显示网格线 1=True=默认显示;0=False=不显示 plt.show() RL.plot_cost() # 观看神经网络的误差曲线 # RL.plot_q() # 观看q_max值变化曲线
RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 print('game over') env.destroy() if __name__ == '__main__': env = Maze() RL = DuelingDQN( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) env.after(100, run_maze) env.mainloop() RL.plot_cost()