def _transition_func(self, state, action): next_state_xyz = super()._transition_func(state, action) next_q = self._transition_q( (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action) next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y, next_state_xyz.z, next_q) if next_q != 0: next_state.set_terminal(True) #next_state._is_terminal = (next_q == 1) return next_state
def _transition_func(self, state, action): next_state_xyz = super()._transition_func(state, action) evaluated_APs = self._evaluate_APs( (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action) next_q = self.automata.transition_func(state.q, evaluated_APs) next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y, next_state_xyz.z, next_q) if self.automata.aut_spot.state_is_accepting(next_q): next_state.set_terminal(True) return next_state
def _transition_func(self, state, action): next_state_xyz = super()._transition_func(state, action) next_q = 0 #print('{}: {}, {}, {}, {}'.format(action, next_state_xyz.x, next_state_xyz.y, next_state_xyz.z, next_q)) next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y, next_state_xyz.z, next_q) flag_terminal = self._evaluate_qstate( (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action) if flag_terminal == 1: next_state.set_terminal(True) return next_state
def _transition_func(self, state, action): if state.is_terminal(): return state next_state_xyz = super()._transition_func(state, action) evaluated_APs = self._evaluate_APs( (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action) next_q = self.automata.transition_func(state.q, evaluated_APs) if (next_q not in self.constraints['Qg']) and ( next_q not in self.constraints['Qs']): # terminal next_q = -1 next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y, next_state_xyz.z, next_q) next_room = self.loc_to_room[(next_state.x, next_state.y, next_state.z)] if self.constraints['mode'] == 'root': if next_state.q in self.constraints['Qg'] or next_state.q == -1: next_state.set_terminal(True) if self.constraints['mode'] == 'child': if next_state.q == -1 or next_state.q in self.constraints['Qg']: next_state.set_terminal(True) if next_room in self.constraints['Sg']: next_state.set_terminal(True) elif next_room not in self.constraints['Ss']: next_state.set_terminal(True) return next_state
def _solve_subproblem_L2(self, init_locs=(1, 1, 1), constraints={}, ap_maps={}): # define l0 domain l0Domain = RoomCubeMDP(init_loc=init_locs, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps, slip_prob=self.slip_prob) # define l1 domain start_room = l0Domain.get_room_numbers(init_locs)[0] start_floor = l0Domain.get_floor_numbers(init_locs)[0] l1Domain = CubeL1MDP(start_room, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) l2Domain = CubeL2MDP(start_floor, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) policy_generators = [] l0_policy_generator = CubeL0PolicyGenerator(l0Domain, env_file=[self.cube_env]) l1_policy_generator = CubeL1PolicyGenerator( l0Domain, AbstractCubeL1StateMapper(l0Domain), env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) l2_policy_generator = CubeL2PolicyGenerator( l1Domain, AbstractCubeL2StateMapper(l1Domain), env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) policy_generators.append(l0_policy_generator) policy_generators.append(l1_policy_generator) policy_generators.append(l2_policy_generator) # 2 levels l1Subtasks = [ PrimitiveAbstractTask(action) for action in l0Domain.ACTIONS ] a2rt = [ CubeL1GroundedAction(a, l1Subtasks, l0Domain) for a in l1Domain.ACTIONS ] a2rt2 = [ CubeL2GroundedAction(a, a2rt, l1Domain) for a in l2Domain.ACTIONS ] l2Root = CubeRootL2GroundedAction(l2Domain.action_for_floor_number(1), a2rt2, l2Domain, l2Domain.terminal_func, l2Domain.reward_func, constraints=constraints, ap_maps=ap_maps) agent = AMDPAgent(l2Root, policy_generators, l0Domain) # Test - base, l1 domain l2Subtasks = [ PrimitiveAbstractTask(action) for action in l1Domain.ACTIONS ] agent.solve() # Extract action seq, state_seq state = RoomCubeState(init_locs[0], init_locs[1], init_locs[2], 0) action_seq = [] state_seq = [state] while state in agent.policy_stack[0].keys(): action = agent.policy_stack[0][state] state = l0Domain._transition_func(state, action) action_seq.append(action) state_seq.append(state) print("Plan") for i in range(len(action_seq)): print("\t", state_seq[i], action_seq[i]) print("\t", state_seq[-1]) return action_seq, state_seq
def __init__(self, len_x=9, len_y=9, len_z=5, init_loc=(1, 1, 1), goal_locs=[(9, 9, 3)], env_file=[], gamma=0.99, slip_prob=0.00, name="cube_room", is_goal_terminal=True, rand_init=False, step_cost=0.0, constraints={ 'goal': [], 'stay': [] }, ap_maps={}): ''' Args: len_x, len_y, len_z (int) init_loc (tuple: (int, int,int)) goal_locs (list of tuples: [(int, int,int)...] env_file: specify environment) constraints: logic formula of 'goal' and 'stay' for the reward function - goal (large positive), stay (zero), otherwise (large negative) ap_maps: dictionary {ap_symbol: (category, state), ...} ex) {a: ('r', [1]), b:('a',west)} category: floor(f), room(r), lowest level action(a), grid cells (c) ''' # Load environment file if len(env_file) == 0: print('Fail to initialize RoomCubeMDP') else: cube_env = env_file[0] len_x = cube_env['len_x'] len_y = cube_env['len_y'] len_z = cube_env['len_z'] walls = cube_env['walls'] self.num_room = cube_env['num_room'] self.num_floor = cube_env['num_floor'] self.room_to_locs = cube_env['room_to_locs'] self.floor_to_rooms = cube_env['floor_to_rooms'] self.floor_to_locs = cube_env['floor_to_locs'] self.room_to_floor = cube_env['room_to_floor'] CubeMDP.__init__(self, len_x, len_y, len_z, init_loc, goal_locs=goal_locs, walls=walls, gamma=gamma, slip_prob=slip_prob, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init, step_cost=step_cost) if 'lowest' in constraints.keys(): self.constraints = {'goal': 'a', 'stay': 'b'} self.ap_maps = { 'a': ap_maps['a'], 'b': [1, 'state', self.get_room_numbers(init_loc)[0]] } # AP --> real world else: self.constraints = constraints # constraints for LTL self.ap_maps = ap_maps init_state = RoomCubeState(init_loc[0], init_loc[1], init_loc[2], self._transition_q(init_loc, "")) if init_state.q != 0: init_state.set_terminal(True) MDP.__init__(self, RoomCubeMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)
def _solve_subproblem_L1(self, init_locs=(1, 1, 1), constraints={}, ap_maps={}, verbose=False): # define l0 domain l0Domain = RoomCubeMDP(init_loc=init_locs, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps, slip_prob=self.slip_prob) backup_num = 0 # if the current state satisfies the constraint already, we don't have to solve it. if l0Domain.init_state.q == 1: action_seq = [] state_seq = [l0Domain.init_state] else: # define l1 domain start_room = l0Domain.get_room_numbers(init_locs)[0] l1Domain = CubeL1MDP(start_room, env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps, slip_prob=self.slip_prob) policy_generators = [] l0_policy_generator = CubeL0PolicyGenerator( l0Domain, env_file=[self.cube_env]) l1_policy_generator = CubeL1PolicyGenerator( l0Domain, AbstractCubeL1StateMapper(l0Domain), env_file=[self.cube_env], constraints=constraints, ap_maps=ap_maps) policy_generators.append(l0_policy_generator) policy_generators.append(l1_policy_generator) # 2 levels l1Subtasks = [ PrimitiveAbstractTask(action) for action in l0Domain.ACTIONS ] a2rt = [ CubeL1GroundedAction(a, l1Subtasks, l0Domain) for a in l1Domain.ACTIONS ] l1Root = CubeRootL1GroundedAction( l1Domain.action_for_room_number(0), a2rt, l1Domain, l1Domain.terminal_func, l1Domain.reward_func, constraints=constraints, ap_maps=ap_maps) agent = AMDPAgent(l1Root, policy_generators, l0Domain) agent.solve() backup_num = agent.backup_num state = RoomCubeState(init_locs[0], init_locs[1], init_locs[2], 0) action_seq = [] state_seq = [state] while state in agent.policy_stack[0].keys(): action = agent.policy_stack[0][state] state = l0Domain._transition_func(state, action) action_seq.append(action) state_seq.append(state) if verbose: print("Plan") for i in range(len(action_seq)): print("\t", state_seq[i], action_seq[i]) print("\t", state_seq[-1]) return action_seq, state_seq, backup_num