def Mazes_generator(self,batch_size): Mazes = [] for MzIter in range(batch_size): [T,R,E] = maze_generator() mdp = MDP(T,R,E,self.rl.mdp.discount) rlSample = RL(mdp,np.random.normal) Mazes.append(rlSample) return Mazes
seed = 1234567891 np.random.seed(seed) # Discount factor: scalar in [0,1) discount = 0.95 H = [] ax = plt.figure(figsize=(15, 10)) plt.grid() plt.xlabel("Episodes") plt.ylabel("Rewards (Undiscounted)") for i in range(100): # MDP object [T, R, E] = maze_generator() mdp = MDP(T, R, E, discount) # RL problem rlProblem = RL(mdp, np.random.normal) q, p, h = rlProblem.modelBasedRL( 0, defaultT=np.ones([mdp.nActions, mdp.nStates, mdp.nStates]) / mdp.nStates, initialR=np.zeros([mdp.nActions, mdp.nStates]), nEpisodes=1500, nSteps=100, epsilon=0.1) H.append(h) print(i, np.mean(h[-100:])) H = np.mean(H, 0)
def init_maze(self): """docstring for init_lvl""" maze = maze_generator(SIZE_X, SIZE_Y, OBJECTS_PRESENTS["wall"], OBJECTS_PRESENTS["clear"]) free = OBJECTS_PRESENTS["clear"] self.place = [] self.items.empty() self.life_obj.empty() self.messages.empty() self.ladders.empty() self.walls.empty() for object_name, count in UNLIFE_OBJECTS_COUNTS.iteritems(): for i in range(count): while 1: x = random.randint(0, SIZE_X - 1) y = random.randint(0, SIZE_Y - 1) if maze[x][y] == free: break sprite = globals().get(object_name)(x, y) self.items.add(sprite) maze[x][y] = UNLIFE_OBJECTS[object_name] for object_name, count in LADDER_OBJECTS_COUNTS.iteritems(): for i in range(count): while 1: x = random.randint(0, SIZE_X - 1) y = random.randint(0, SIZE_Y - 1) if maze[x][y] == free: break sprite = globals().get(object_name)(x, y) self.ladders.add(sprite) if object_name == "LadderUp": self.init_human_position = (x, y,) maze[x][y] = LADDER_OBJECTS[object_name] for object_name, count in LIFE_OBJECTS_COUNTS.iteritems(): for i in range(count): while 1: x = random.randint(0, SIZE_X - 1) y = random.randint(0, SIZE_Y - 1) if maze[x][y] == free: break sprite = globals().get(object_name)(x, y) self.life_obj.add(sprite) self.messages.add(sprite.message) maze[x][y] = LIFE_OBJECTS[object_name] for x in range(SIZE_X): self.place.append([]) for y in range(SIZE_Y): in_cells = maze[x][y] if in_cells == OBJECTS_PRESENTS["wall"]: wall = Wall(x, y) self.place[-1].append(wall) self.walls.add(wall) else: self.place[-1].append(None) return maze