Exemplo n.º 1
0
    def __init__(self, log_name='MazeEnv', maze=None):
        if maze is None:
            self.maze = Maze.build(bounds=(50, 50))
        else:
            self.maze = Maze(bounds=(maze.max_x, maze.max_y),
                             target=maze.target)

        # if MazeEnv.logger is None:
        #     MazeEnv.logger = Logger("MazeEnv")
        # self.logger = Logger(log_name, show_in_console=False)
        self.viewer = None
Exemplo n.º 2
0
    def __init__(self, log_name='MazeEnv', maze=None):
        if maze is None:
            self.maze = Maze.build(bounds=(10, 10), block_cnt=20)
        else:
            self.maze = Maze(
                start=(maze.x, maze.y),
                bounds=(maze.max_x, maze.max_y),
                door=maze.door,
                blocks=maze.blocks)

        # if MazeEnv.logger is None:
        #     MazeEnv.logger = Logger("MazeEnv")
        # self.logger = Logger(log_name, show_in_console=False)
        self.viewer = None
Exemplo n.º 3
0
    def reset(self):
        x = random.randint(0, self.maze.max_x - 1)
        y = random.randint(0, self.maze.max_y - 1)

        self.maze = Maze.build(bounds=(self.maze.max_x, self.maze.max_y))

        if self.viewer is not None:
            self.viewer.set_maze(self.maze)

        return self.get_state()
Exemplo n.º 4
0
def main():
    global env, RL
    env = Maze('./env/maps/map3.json', full_observation=True)
    RL = DeepQNetwork(
        n_actions=4,
        n_features=env.height * env.width,
        restore_path=None,
        # restore_path=base_path + 'model_dqn.ckpt',
        learning_rate=0.00001,
        reward_decay=0.9,
        e_greedy=0.95,
        replace_target_iter=4e4,
        batch_size=64,
        e_greedy_init=0,
        # e_greedy_increment=None,
        e_greedy_increment=1e-3,
        output_graph=False,
    )
    env.after(100, run_maze)
    env.mainloop()
Exemplo n.º 5
0
                      total_step)
            except Exception as e:
                print(e)


if __name__ == "__main__":
    GLOBAL_NET_SCOPE = 'Global_Net'
    N_S = MazeEnv.state_space_dim
    N_A = MazeEnv.action_dim

    SESS = tf.Session()

    with tf.device("/cpu:0"):
        # OPT_A = tf.train.RMSPropOptimizer(LR_A, name='RMSPropA')
        # OPT_C = tf.train.RMSPropOptimizer(LR_C, name='RMSPropC')
        global_maze = Maze.build(bounds=(80, 80))
        # sess, name, N_S, N_A, globalAC, maze=None
        GLOBAL_AC = A3CNet(SESS, GLOBAL_NET_SCOPE, N_S,
                           N_A)  # we only need its params
        workers = []

        # Create worker
        for i in range(N_WORKERS):
            i_name = 'W_%i' % i  # worker name
            workers.append(
                Worker(SESS, i_name, N_S, N_A, GLOBAL_AC, global_maze))

    COORD = tf.train.Coordinator()
    SESS.run(tf.global_variables_initializer())

    worker_threads = []
Exemplo n.º 6
0
class MazeEnv(object):
    """
    迷宫的模拟环境
    """
    action_dim = 4
    state_space_dim = 2

    def __init__(self, log_name='MazeEnv', maze=None):
        if maze is None:
            self.maze = Maze.build(bounds=(10, 10), block_cnt=20)
        else:
            self.maze = Maze(
                start=(maze.x, maze.y),
                bounds=(maze.max_x, maze.max_y),
                door=maze.door,
                blocks=maze.blocks)

        # if MazeEnv.logger is None:
        #     MazeEnv.logger = Logger("MazeEnv")
        # self.logger = Logger(log_name, show_in_console=False)
        self.viewer = None
        # self.queue = Queue()

    def reset(self):
        x = random.randint(0, self.maze.max_x - 1)
        y = random.randint(0, self.maze.max_y - 1)
        # x, y = 0, 0
        self.maze.set_start((x, y))
        # self.maze = Maze.build(bounds=(20, 20), block_cnt=100)

        if self.viewer is not None:
            self.viewer.maze = self.maze
        return self.get_state()

    # def clear_queue(self):
    #     while not self.queue.empty():
    #         self.queue.get()

    def step(self, a):
        """
        根据动作转换状态,返回新的状态(s), reward, done
        a: 0=up,1=down,2=left,3=right
        """
        # s = self.get_state()
        succ = False

        if a == 3:
            succ = self.maze.move_up()
        if a == 2:
            succ = self.maze.move_down()
        if a == 1:
            succ = self.maze.move_left()
        if a == 0:
            succ = self.maze.move_right()

        r = 0  # 每走一步-1分,直到门,相当于策略要用最短的步数走出去
        if not succ:  # 对撞墙等错误行为惩罚
            r = 0
        done = False
        if self.maze.done():
            done = True
            r = 10
        # self.logger.debug([s, a, self.get_state(), r, done])
        # if self.viewer is not None:
        #     self.viewer.maze.set_start(start=(self.maze.x, self.maze.y))

        return self.get_state(), r, done

    def render(self):
        if self.viewer is None:
            self.viewer = MazeViewer(self.maze)
        self.viewer.render()

    def get_state(self):
        return np.hstack([self.maze.x, self.maze.y])
Exemplo n.º 7
0
def main():
    global env
    env = Maze('./env/maps/map3.json')
    env.after(100, run_maze)
    env.mainloop()  # mainloop() to run the application.
Exemplo n.º 8
0
        arrow = '←'
    if action == 3:
        arrow = '→'

    return arrow


# function phi() : used to image preprocessing. Here it is a empty function.
def phi(observation):
    pass
    return observation


if __name__ == "__main__":
    # get the maze environment
    env = Maze()
    # get the DeepQNetwork Agent
    RL = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=2000,
                      e_greedy_increment=0.01,
                      output_graph=True,
                      )
    # Calculate running time
    start_time = time.time()

    run_maze()
Exemplo n.º 9
0
def main():
    global env, RL, env_model

    # if_dyna = True
    # env = Maze('./env/maps/map2.json')
    # if if_dyna:
    #     # ---------- Dyna Q ---------- # #
    #     from brain.dyna_Q import QLearningTable, EnvModel
    #     RL = QLearningTable(actions=list(range(env.n_actions)))
    #     env_model = EnvModel(actions=list(range(env.n_actions)))
    #     env.after(0, update_dyna_q)  # Call function update() once after given time/ms.
    # else:
    #     # # -------- Q Learning -------- # #
    #     from brain.Q_learning import QLearningTable
    #     RL = QLearningTable(actions=list(range(env.n_actions)))
    #     env.after(0, update_q())  # Call function update() once after given time/ms.

    time_cmp = []
    # -------- Q Learning -------- # #
    from brain.Q_learning import QLearningTable
    start = time.time()
    env = Maze('./env/maps/map2.json')
    RL = QLearningTable(actions=list(range(env.n_actions)))
    env.after(0,
              update_q())  # Call function update() once after given time/ms.
    env.mainloop()
    sum_time = time.time() - start
    time_cmp.append(sum_time)
    # ---------- Dyna Q ---------- # #
    from brain.dyna_Q import QLearningTable, EnvModel
    for n in [5, 10, 25, 50]:
        start = time.time()
        env = Maze('./env/maps/map2.json')
        RL = QLearningTable(actions=list(range(env.n_actions)))
        env_model = EnvModel(actions=list(range(env.n_actions)))
        print('Dyna-{}'.format(n))
        env.after(0, update_dyna_q,
                  n)  # n is the parameter of update_dyna_q().
        env.mainloop()  # mainloop() to run the application.
        sum_time = time.time() - start
        time_cmp.append(sum_time)

    # This part must after env.mainloop()
    # plot all lines.
    all_aver_steps = [np.load('./logs/q_learning/q_learning.npy').tolist()]
    for n in [5, 10, 25, 50]:
        all_aver_steps.append(
            np.load('./logs/dyna_q/dyna_q_{}.npy'.format(n)).tolist())
    plot_multi_lines(
        all_aver_steps,
        all_labels=['q_learning', 'dyna_5', 'dyna_10', 'dyna_25', 'dyna_50'],
        save_path='./logs/cmp_all.png')

    # only plot dyna_Q
    all_aver_steps = []
    for n in [5, 10, 25, 50]:
        all_aver_steps.append(
            np.load('./logs/dyna_q/dyna_q_{}.npy'.format(n))[0:100].tolist())
    plot_multi_lines(all_aver_steps,
                     all_labels=['dyna_5', 'dyna_10', 'dyna_25', 'dyna_50'],
                     save_path='./logs/cmp_all_dyna_Q.png')

    print(time_cmp)
Exemplo n.º 10
0
Arquivo: rl.py Projeto: LDNN97/RLES
    #         action = rl.choose_action(str(observation))
    #         observation_, reward, done = env.step(action)
    #         rl.learn(str(observation), action, reward, str(observation_))
    #         observation = observation_
    #         if done:
    #             break

    # on policy
    rl = SarsaTable(actions=list(range(env.n_actions)))
    for episode in range(100):
        observation = env.reset()
        action = rl.choose_action(str(observation))
        while True:
            observation_, reward, done = env.step(action)
            action_ = rl.choose_action(str(observation_))
            rl.learn(str(observation), action, reward, str(observation_),
                     action_)
            observation = observation_
            action = action_
            if done:
                break

    print('game over')
    env.destroy()


if __name__ == '__main__':
    env = Maze()
    env.after(1000, main)
    env.mainloop()
Exemplo n.º 11
0
                      total_step)
            except Exception as e:
                print(e)


if __name__ == "__main__":
    GLOBAL_NET_SCOPE = 'Global_Net'
    N_S = MazeEnv.state_space_dim
    N_A = MazeEnv.action_dim

    SESS = tf.Session()

    with tf.device("/cpu:0"):
        # OPT_A = tf.train.RMSPropOptimizer(LR_A, name='RMSPropA')
        # OPT_C = tf.train.RMSPropOptimizer(LR_C, name='RMSPropC')
        global_maze = Maze.build(bounds=(30, 30), block_cnt=200)
        # sess, name, N_S, N_A, globalAC, maze=None
        GLOBAL_AC = A3CNet(SESS, GLOBAL_NET_SCOPE, N_S,
                           N_A)  # we only need its params
        workers = []

        # Create worker
        for i in range(N_WORKERS):
            i_name = 'W_%i' % i  # worker name
            workers.append(
                Worker(SESS, i_name, N_S, N_A, GLOBAL_AC, global_maze))

    COORD = tf.train.Coordinator()
    SESS.run(tf.global_variables_initializer())

    worker_threads = []
Exemplo n.º 12
0
class MazeEnv(object):
    """
    迷宫的模拟环境
    """
    action_dim = 4
    state_space_dim = 5

    def __init__(self, log_name='MazeEnv', maze=None):
        if maze is None:
            self.maze = Maze.build(bounds=(50, 50))
        else:
            self.maze = Maze(bounds=(maze.max_x, maze.max_y),
                             target=maze.target)

        # if MazeEnv.logger is None:
        #     MazeEnv.logger = Logger("MazeEnv")
        # self.logger = Logger(log_name, show_in_console=False)
        self.viewer = None
        # self.queue = Queue()

    def reset(self):
        x = random.randint(0, self.maze.max_x - 1)
        y = random.randint(0, self.maze.max_y - 1)

        self.maze = Maze.build(bounds=(self.maze.max_x, self.maze.max_y))

        if self.viewer is not None:
            self.viewer.set_maze(self.maze)

        return self.get_state()

    def step(self, a):
        """
        根据动作转换状态,返回新的状态(s), reward, done
        a: 0=up,1=down,2=left,3=right
        """
        # s = self.get_state()

        if a == 3:
            self.maze.move_up()
        if a == 2:
            self.maze.move_down()
        if a == 1:
            self.maze.move_left()
        if a == 0:
            self.maze.move_right()

        r = self.maze.snakes[0].delta_len() - 1
        done = False
        if self.maze.done():
            done = True
            r = -5

        return self.get_state(), r, done

    def render(self):
        if self.viewer is None:
            self.viewer = MazeViewer(self.maze)
        self.viewer.render()

    def get_state(self):
        return np.hstack([
            self.maze.target[0], self.maze.target[1], self.maze.snakes[0].x,
            self.maze.snakes[0].y, self.maze.snakes[0].length()
        ])
Exemplo n.º 13
0
def viewer_run():
    viewer = MazeViewer(Maze())
    while True:
        viewer.render()
Exemplo n.º 14
0
    def _calc_block_size(self):
        w, h = self.get_size()
        area_w = w * self._get_occupy()
        area_h = h * self._get_occupy()

        block_w = area_w / self.maze.max_x
        block_h = area_h / self.maze.max_y

        return (block_w, block_h)


def viewer_run():
    viewer = MazeViewer(Maze())
    while True:
        viewer.render()


if __name__ == '__main__':
    import time
    import random
    import logging
    maze = Maze.build((10, 10))
    viewer = MazeViewer(maze)
    while True:
        dx, dy = random.randint(-1, 1), random.randint(-1, 1)
        print(dx, dy)
        maze.move(dx, dy)
        viewer.render()
        time.sleep(0.1)
Exemplo n.º 15
0
def main():
    global env
    env = Maze('./env/maps/map1.json', full_observation=True)
    env.after(100, run_maze)  # Call function update() once after given time/ms.
    env.mainloop()  # mainloop() to run the application.