def run_training(maze_size=(6, 6), trap_number=1, epoch=20, epsilon0=0.3, alpha=0.3, gamma=0.9): # # 可选的参数: # epoch = 20 # # 随机探索的初始概率 # epsilon0 = 0.3 # # 松弛变量 # alpha = 0.3 # # 折扣因子 # gamma = 0.9 # # 地图大小 # maze_size = (6, 6) # # 陷阱数量 # trap_number = 1 g = Maze(maze_size=maze_size, trap_number=trap_number) r = Robot(g, alpha=alpha, epsilon0=epsilon0, gamma=gamma) r.set_status(learning=True) runner = Runner(r, g) runner.run_training(epoch, display_direction=True) # runner.generate_movie(filename = "final1.mp4") # 你可以注释该行代码,加快运行速度,不过你就无法观察到视频了。 # runner.plot_results() return runner
def test_different_parameter(alpha_test, gamma_test, epsilon_test, epoch_test): g = Maze(maze_size=maze_size, trap_number=trap_number) r = Robot(g, alpha=alpha_test, epsilon0=epsilon_test, gamma=gamma_test) r.set_status(learning=True) runner = Runner(r, g) runner.run_training(epoch_test, display_direction=True) print("alpha: {}, gamma: {}, epsilon: {}, epoch: {}".format( alpha_test, gamma_test, epsilon_test, epoch_test)) runner.plot_results()
def train_by_dqn_robot(times, maze_size=5): print("start times:", times) maze = Maze(maze_size=maze_size) """choose Keras or Torch version""" robot = KerasRobot(maze=maze) # robot = TorchRobot(maze=maze) robot.memory.build_full_view(maze=maze) """training by runner""" runner = Runner(robot=robot) runner.run_training(15, 75) """Test Robot""" robot.reset() for _ in range(25): a, r = robot.test_update() if r < -20: print( "SUCCESSFUL!", "| TIMES:", times, ) break
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Oct 5 14:12:30 2018 @author: htaiwan """ from Maze import Maze from Robot import Robot from Runner import Runner epoch = 20 epsilon0 = 0.7 alpha = 0.5 gamma = 0.9 maze_size = (6,6) trap_number = 1 maze = Maze(maze_size=maze_size, trap_number=trap_number) robot = Robot(maze, alpha=alpha, epsilon0=epsilon0, gamma=gamma) robot.set_status(learning=True) runner = Runner(robot, maze) runner.run_training(epoch, display_direction=True) #runner.generate_movie(filename = "final.avi") # 你可以注释该行代码,加快运行速度,不过你就无法观察到视频了。 runner.plot_results()
print("the returned reward: ", action) ''' """ Qlearning 算法相关参数: """ epoch = 20 # 训练轮数 epsilon0 = 1 # 初始探索概率 alpha = 0.5 # 公式中的 ⍺ gamma = 0.94 # 公式中的 γ maze_size = 11 # 迷宫size """ 使用 QLearning 算法训练过程 """ g = Maze(maze_size=maze_size) r = QRobot(g, alpha=alpha, epsilon0=epsilon0, gamma=gamma) runner = Runner(r) runner.run_training(epoch, training_per_epoch=int(maze_size * maze_size * 1.5)) # 生成训练过程的gif图, 建议下载到本地查看;也可以注释该行代码,加快运行速度。 # runner.generate_gif(filename="results/size5.gif") runner.plot_results() # 输出训练结果,可根据该结果对您的机器人进行分析。 ''' test_memory = ReplayDataSet(max_size=1e3) # 初始化并设定最大容量 actions = ['u', 'r', 'd', 'l'] test_memory.add((0,1), actions.index("r"), -10, (0,1), 1) # 添加一条数据(state, action_index, reward, next_state) print(test_memory.random_sample(1)) # 从中随机抽取一条(因为只有一条数据) ''' ''' os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # 允许重复载入lib文件 maze = Maze(maze_size=5)
from torch_py.MinDQNRobot import MinDQNRobot as Robot # PyTorch版本 """ Deep Qlearning 算法相关参数: """ epoch = 20 # 训练轮数 maze_size = 5 # 迷宫size training_per_epoch = int(maze_size * maze_size * 2) """ 使用 DQN 算法训练 """ maze = Maze(maze_size=maze_size) robot = Robot(maze) print(robot.maze.reward) # 输出最小值选择策略的reward值 """开启金手指,获取全图视野""" robot.memory.build_full_view(maze=maze) runner = Runner(robot) runner.run_training(epoch, training_per_epoch) runner.plot_results() # """Test Robot""" # robot.reset() # for _ in range(25): # a, r = robot.test_update() # print("action:", a, "reward:", r) # if r == maze.reward["destination"]: # print("success") # break # 生成训练过程的gif图, 建议下载到本地查看;也可以注释该行代码,加快运行速度。 # runner.generate_gif(filename="results/dqn_size10.gif") # %%