Exemplo n.º 1
0
def run_training(maze_size=(6, 6),
                 trap_number=1,
                 epoch=20,
                 epsilon0=0.3,
                 alpha=0.3,
                 gamma=0.9):

    # # 可选的参数:
    # epoch = 20

    # # 随机探索的初始概率
    # epsilon0 = 0.3

    # # 松弛变量
    # alpha = 0.3

    # # 折扣因子
    # gamma = 0.9

    # # 地图大小
    # maze_size = (6, 6)

    # # 陷阱数量
    # trap_number = 1

    g = Maze(maze_size=maze_size, trap_number=trap_number)
    r = Robot(g, alpha=alpha, epsilon0=epsilon0, gamma=gamma)
    r.set_status(learning=True)

    runner = Runner(r, g)
    runner.run_training(epoch, display_direction=True)
    # runner.generate_movie(filename = "final1.mp4") # 你可以注释该行代码,加快运行速度,不过你就无法观察到视频了。

    # runner.plot_results()
    return runner
Exemplo n.º 2
0
def test_different_parameter(alpha_test, gamma_test, epsilon_test, epoch_test):
    g = Maze(maze_size=maze_size, trap_number=trap_number)
    r = Robot(g, alpha=alpha_test, epsilon0=epsilon_test, gamma=gamma_test)
    r.set_status(learning=True)

    runner = Runner(r, g)
    runner.run_training(epoch_test, display_direction=True)
    print("alpha: {}, gamma: {}, epsilon: {}, epoch: {}".format(
        alpha_test, gamma_test, epsilon_test, epoch_test))
    runner.plot_results()
Exemplo n.º 3
0
def train_by_dqn_robot(times, maze_size=5):
    print("start times:", times)

    maze = Maze(maze_size=maze_size)
    """choose Keras or Torch version"""
    robot = KerasRobot(maze=maze)
    # robot = TorchRobot(maze=maze)
    robot.memory.build_full_view(maze=maze)
    """training by runner"""
    runner = Runner(robot=robot)
    runner.run_training(15, 75)
    """Test Robot"""
    robot.reset()
    for _ in range(25):
        a, r = robot.test_update()
        if r < -20:
            print(
                "SUCCESSFUL!",
                "| TIMES:",
                times,
            )
            break
Exemplo n.º 4
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct  5 14:12:30 2018

@author: htaiwan
"""

from Maze import Maze
from Robot import Robot
from Runner import Runner

epoch = 20

epsilon0 = 0.7
alpha = 0.5
gamma = 0.9

maze_size = (6,6)
trap_number = 1

maze = Maze(maze_size=maze_size, trap_number=trap_number)
robot = Robot(maze, alpha=alpha, epsilon0=epsilon0, gamma=gamma)
robot.set_status(learning=True)

runner = Runner(robot, maze)
runner.run_training(epoch, display_direction=True)
#runner.generate_movie(filename = "final.avi") # 你可以注释该行代码,加快运行速度,不过你就无法观察到视频了。

runner.plot_results()
Exemplo n.º 5
0
print("the returned reward: ", action)
'''
"""  Qlearning 算法相关参数: """

epoch = 20  # 训练轮数
epsilon0 = 1  # 初始探索概率
alpha = 0.5  # 公式中的 ⍺
gamma = 0.94  # 公式中的 γ
maze_size = 11  # 迷宫size
""" 使用 QLearning 算法训练过程 """

g = Maze(maze_size=maze_size)
r = QRobot(g, alpha=alpha, epsilon0=epsilon0, gamma=gamma)

runner = Runner(r)
runner.run_training(epoch, training_per_epoch=int(maze_size * maze_size * 1.5))

# 生成训练过程的gif图, 建议下载到本地查看;也可以注释该行代码,加快运行速度。
# runner.generate_gif(filename="results/size5.gif")

runner.plot_results()  # 输出训练结果,可根据该结果对您的机器人进行分析。
'''
test_memory = ReplayDataSet(max_size=1e3) # 初始化并设定最大容量
actions = ['u', 'r', 'd', 'l']
test_memory.add((0,1), actions.index("r"), -10, (0,1), 1)  # 添加一条数据(state, action_index, reward, next_state)
print(test_memory.random_sample(1)) # 从中随机抽取一条(因为只有一条数据)
'''
'''
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"  # 允许重复载入lib文件

maze = Maze(maze_size=5)
Exemplo n.º 6
0
from torch_py.MinDQNRobot import MinDQNRobot as Robot  # PyTorch版本
"""  Deep Qlearning 算法相关参数: """

epoch = 20  # 训练轮数
maze_size = 5  # 迷宫size
training_per_epoch = int(maze_size * maze_size * 2)
""" 使用 DQN 算法训练 """

maze = Maze(maze_size=maze_size)
robot = Robot(maze)

print(robot.maze.reward)  # 输出最小值选择策略的reward值
"""开启金手指,获取全图视野"""
robot.memory.build_full_view(maze=maze)
runner = Runner(robot)
runner.run_training(epoch, training_per_epoch)
runner.plot_results()

# """Test Robot"""
# robot.reset()
# for _ in range(25):
#     a, r = robot.test_update()
#     print("action:", a, "reward:", r)
#     if r == maze.reward["destination"]:
#         print("success")
#         break

# 生成训练过程的gif图, 建议下载到本地查看;也可以注释该行代码,加快运行速度。
# runner.generate_gif(filename="results/dqn_size10.gif")

# %%