예제 #1
0
 def set_room(self, room):
     assert 0 <= room < len(self.rooms)
     self.room = room
     roomarr = self.rooms[room]
     self.states = roomarr.copy()
     self.goal = filter_states(self.states, self.field.DOOR)[0]
     self.initPos = filter_states(self.states, self.field.INIT)
     self.lock_pos = filter_states(self.states, self.field.LOCK)[0]
     key_pos = filter_states(self.states, self.field.KEY)
     self.key_pos = key_pos[0] if key_pos else None
     self.agent_holding_key = False
예제 #2
0
 def perform_action(self, state, action):
     assert action.shape == (1,)
     action = action[0] # in this case action i s 
     pos2 = (state.x, state.y)
     a = self.actions
     reward = self.step_reward
     if action in [a.N, a.S, a.E, a.W]:
         delta = self.movemap[action]
         pos2 = tuple(map(sum,zip(pos2, delta)))
         if self.forbidden_pos(pos2):
             pos2 = (state.x,state.y)
         if pos2 == self.goal:
             state = State(*[0]*17)._replace(x=pos2[0], y=pos2[1])
             if not self.episode_finished(state):
                 self.set_room(self.room + 1)
                 reward = self.task_reward
                 state2 = self.new_state()
                 pos2 = (state2.x, state2.y)
             else:
                 reward = self.final_reward
     elif action == a.G:
         if self.states[pos2] == self.field.KEY:
             self.agent_holding_key = True
             self.states[pos2] = self.field.EMPTY
             reward = self.task_reward
     elif action == a.P:
         if (self.states[pos2] == self.field.LOCK and
                 not filter_states(self.states, self.field.KEY)):
             self.agent_holding_key = False
             self.states[self.goal] = self.field.EMPTY
             #self.states[self.states == self.field.DOOR] = self.field.EMPTY
             reward = self.task_reward
     return (self.calculate_state(pos2), reward)
예제 #3
0
 def choose_action_parameterized(self, env, state, field, action):
     if self.room != state.r:
         self.room = state.r
         self.plan = None
     pos = (state.x, state.y)
     if self.plan is None or pos not in self.plan:
         key_pos = filter_states(env.states, field)
         # Not positive if this is right move here.
         if not key_pos:
             #print 'RANDOM action for plan', action
             return np.array([choice(env.actions)])
         priority_func = lambda s: manhattan_dist(s, key_pos[0])
         expand_partial = lambda s: expand_state(env.states, s)
         self.plan = search.best_first_graph_search(pos, key_pos[0], priority_func, expand_partial)
     for i, pathpos in enumerate(self.plan):
         if i == len(self.plan)-1:
             #print 'action', action, 'for plan', field
             return np.array([action])
         elif pos == pathpos:
             fx,fy = self.plan[i+1]
             dx,dy = (fx-state.x, fy-state.y)
             #print 'move', dx,dy, 'for plan', field
             return np.array([env.movemap[dx,dy]])