class MazeEnv(object): """ 迷宫的模拟环境 """ action_dim = 4 state_space_dim = 2 def __init__(self, log_name='MazeEnv', maze=None): if maze is None: self.maze = Maze.build(bounds=(10, 10), block_cnt=20) else: self.maze = Maze( start=(maze.x, maze.y), bounds=(maze.max_x, maze.max_y), door=maze.door, blocks=maze.blocks) # if MazeEnv.logger is None: # MazeEnv.logger = Logger("MazeEnv") # self.logger = Logger(log_name, show_in_console=False) self.viewer = None # self.queue = Queue() def reset(self): x = random.randint(0, self.maze.max_x - 1) y = random.randint(0, self.maze.max_y - 1) # x, y = 0, 0 self.maze.set_start((x, y)) # self.maze = Maze.build(bounds=(20, 20), block_cnt=100) if self.viewer is not None: self.viewer.maze = self.maze return self.get_state() # def clear_queue(self): # while not self.queue.empty(): # self.queue.get() def step(self, a): """ 根据动作转换状态,返回新的状态(s), reward, done a: 0=up,1=down,2=left,3=right """ # s = self.get_state() succ = False if a == 3: succ = self.maze.move_up() if a == 2: succ = self.maze.move_down() if a == 1: succ = self.maze.move_left() if a == 0: succ = self.maze.move_right() r = 0 # 每走一步-1分,直到门,相当于策略要用最短的步数走出去 if not succ: # 对撞墙等错误行为惩罚 r = 0 done = False if self.maze.done(): done = True r = 10 # self.logger.debug([s, a, self.get_state(), r, done]) # if self.viewer is not None: # self.viewer.maze.set_start(start=(self.maze.x, self.maze.y)) return self.get_state(), r, done def render(self): if self.viewer is None: self.viewer = MazeViewer(self.maze) self.viewer.render() def get_state(self): return np.hstack([self.maze.x, self.maze.y])
class MazeEnv(object): """ 迷宫的模拟环境 """ action_dim = 4 state_space_dim = 5 def __init__(self, log_name='MazeEnv', maze=None): if maze is None: self.maze = Maze.build(bounds=(50, 50)) else: self.maze = Maze(bounds=(maze.max_x, maze.max_y), target=maze.target) # if MazeEnv.logger is None: # MazeEnv.logger = Logger("MazeEnv") # self.logger = Logger(log_name, show_in_console=False) self.viewer = None # self.queue = Queue() def reset(self): x = random.randint(0, self.maze.max_x - 1) y = random.randint(0, self.maze.max_y - 1) self.maze = Maze.build(bounds=(self.maze.max_x, self.maze.max_y)) if self.viewer is not None: self.viewer.set_maze(self.maze) return self.get_state() def step(self, a): """ 根据动作转换状态,返回新的状态(s), reward, done a: 0=up,1=down,2=left,3=right """ # s = self.get_state() if a == 3: self.maze.move_up() if a == 2: self.maze.move_down() if a == 1: self.maze.move_left() if a == 0: self.maze.move_right() r = self.maze.snakes[0].delta_len() - 1 done = False if self.maze.done(): done = True r = -5 return self.get_state(), r, done def render(self): if self.viewer is None: self.viewer = MazeViewer(self.maze) self.viewer.render() def get_state(self): return np.hstack([ self.maze.target[0], self.maze.target[1], self.maze.snakes[0].x, self.maze.snakes[0].y, self.maze.snakes[0].length() ])