示例#1
0
 def Mazes_generator(self,batch_size):
     Mazes = []
     for MzIter in range(batch_size):
         [T,R,E] = maze_generator()
         mdp = MDP(T,R,E,self.rl.mdp.discount)
         rlSample = RL(mdp,np.random.normal)
         Mazes.append(rlSample)
     return Mazes
seed = 1234567891
np.random.seed(seed)
# Discount factor: scalar in [0,1)
discount = 0.95
H = []

ax = plt.figure(figsize=(15, 10))
plt.grid()
plt.xlabel("Episodes")
plt.ylabel("Rewards (Undiscounted)")

for i in range(100):

    # MDP object
    [T, R, E] = maze_generator()
    mdp = MDP(T, R, E, discount)

    # RL problem
    rlProblem = RL(mdp, np.random.normal)
    q, p, h = rlProblem.modelBasedRL(
        0,
        defaultT=np.ones([mdp.nActions, mdp.nStates, mdp.nStates]) /
        mdp.nStates,
        initialR=np.zeros([mdp.nActions, mdp.nStates]),
        nEpisodes=1500,
        nSteps=100,
        epsilon=0.1)
    H.append(h)
    print(i, np.mean(h[-100:]))
H = np.mean(H, 0)
示例#3
0
文件: level.py 项目: maulerant/pycats
    def init_maze(self):
        """docstring for init_lvl"""
        maze = maze_generator(SIZE_X, SIZE_Y, OBJECTS_PRESENTS["wall"], OBJECTS_PRESENTS["clear"])
        free = OBJECTS_PRESENTS["clear"]
        self.place = []

        self.items.empty()
        self.life_obj.empty()
        self.messages.empty()
        self.ladders.empty()
        self.walls.empty()

        for object_name, count in UNLIFE_OBJECTS_COUNTS.iteritems():
            for i in range(count):
                while 1:
                    x = random.randint(0, SIZE_X - 1)
                    y = random.randint(0, SIZE_Y - 1)
                    if maze[x][y] == free:
                        break
                sprite = globals().get(object_name)(x, y)
                self.items.add(sprite)
                maze[x][y] = UNLIFE_OBJECTS[object_name]

        for object_name, count in LADDER_OBJECTS_COUNTS.iteritems():
            for i in range(count):
                while 1:
                    x = random.randint(0, SIZE_X - 1)
                    y = random.randint(0, SIZE_Y - 1)
                    if maze[x][y] == free:
                        break
                sprite = globals().get(object_name)(x, y)
                self.ladders.add(sprite)
                if object_name == "LadderUp":
                    self.init_human_position = (x, y,)
                maze[x][y] = LADDER_OBJECTS[object_name]

        for object_name, count in LIFE_OBJECTS_COUNTS.iteritems():
            for i in range(count):
                while 1:
                    x = random.randint(0, SIZE_X - 1)
                    y = random.randint(0, SIZE_Y - 1)
                    if maze[x][y] == free:
                        break
                sprite = globals().get(object_name)(x, y)
                self.life_obj.add(sprite)
                self.messages.add(sprite.message)
                maze[x][y] = LIFE_OBJECTS[object_name]

        for x in range(SIZE_X):
            self.place.append([])
            for y in range(SIZE_Y):
                in_cells = maze[x][y]

                if in_cells == OBJECTS_PRESENTS["wall"]:
                    wall = Wall(x, y)
                    self.place[-1].append(wall)
                    self.walls.add(wall)
                else:
                    self.place[-1].append(None)

        return maze