RL = DuelingDQN(
        env.n_actions,
        env.n_features,  #observation/state 的属性,如长宽高
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        dueling=True,
        replace_target_iter=200,  # 每 200 步替换一次 target_net 的参数
        memory_size=2000,  # 记忆上限
        # output_graph=True   # 是否输出 tensorboard 文件
    )
    env.after(100, run_maze)  #进行强化学习训练
    env.mainloop()
    # 观看训练时间曲线
    his_dueling = np.vstack((episodes, steps))
    file = open('his_dueling', 'wb')
    pickle.dump(his_dueling, file)
    file.close()

    plt.plot(his_dueling[0, :],
             his_dueling[1, :] - his_dueling[1, 0],
             c='b',
             label='Dueling DQN')
    plt.legend(loc='best')  # legend图例,其中’loc’参数有多种,’best’表示自动分配最佳位置
    plt.ylabel('total training time')
    plt.xlabel('episode')
    plt.grid()  # 显示网格线 1=True=默认显示;0=False=不显示
    plt.show()
    RL.plot_cost()  # 观看神经网络的误差曲线
    # RL.plot_q()   # 观看q_max值变化曲线
示例#2
0
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    print('game over')
    env.destroy()


if __name__ == '__main__':
    env = Maze()
    RL = DuelingDQN(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )

    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()