示例#1
0
    def _transition_func(self, state, action):
        next_state_xyz = super()._transition_func(state, action)

        next_q = self._transition_q(
            (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action)

        next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y,
                                   next_state_xyz.z, next_q)

        if next_q != 0:
            next_state.set_terminal(True)
            #next_state._is_terminal = (next_q == 1)

        return next_state
示例#2
0
    def _transition_func(self, state, action):
        next_state_xyz = super()._transition_func(state, action)

        evaluated_APs = self._evaluate_APs(
            (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action)
        next_q = self.automata.transition_func(state.q, evaluated_APs)

        next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y,
                                   next_state_xyz.z, next_q)

        if self.automata.aut_spot.state_is_accepting(next_q):
            next_state.set_terminal(True)

        return next_state
示例#3
0
    def _transition_func(self, state, action):
        next_state_xyz = super()._transition_func(state, action)

        next_q = 0

        #print('{}: {}, {}, {}, {}'.format(action, next_state_xyz.x, next_state_xyz.y, next_state_xyz.z, next_q))
        next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y,
                                   next_state_xyz.z, next_q)

        flag_terminal = self._evaluate_qstate(
            (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action)

        if flag_terminal == 1:
            next_state.set_terminal(True)

        return next_state
示例#4
0
    def _transition_func(self, state, action):

        if state.is_terminal():
            return state

        next_state_xyz = super()._transition_func(state, action)

        evaluated_APs = self._evaluate_APs(
            (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action)

        next_q = self.automata.transition_func(state.q, evaluated_APs)

        if (next_q not in self.constraints['Qg']) and (
                next_q not in self.constraints['Qs']):  # terminal
            next_q = -1

        next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y,
                                   next_state_xyz.z, next_q)

        next_room = self.loc_to_room[(next_state.x, next_state.y,
                                      next_state.z)]

        if self.constraints['mode'] == 'root':
            if next_state.q in self.constraints['Qg'] or next_state.q == -1:
                next_state.set_terminal(True)

        if self.constraints['mode'] == 'child':
            if next_state.q == -1 or next_state.q in self.constraints['Qg']:
                next_state.set_terminal(True)

            if next_room in self.constraints['Sg']:
                next_state.set_terminal(True)
            elif next_room not in self.constraints['Ss']:
                next_state.set_terminal(True)

        return next_state
示例#5
0
    def _solve_subproblem_L2(self,
                             init_locs=(1, 1, 1),
                             constraints={},
                             ap_maps={}):
        # define l0 domain
        l0Domain = RoomCubeMDP(init_loc=init_locs,
                               env_file=[self.cube_env],
                               constraints=constraints,
                               ap_maps=ap_maps,
                               slip_prob=self.slip_prob)

        # define l1 domain
        start_room = l0Domain.get_room_numbers(init_locs)[0]
        start_floor = l0Domain.get_floor_numbers(init_locs)[0]

        l1Domain = CubeL1MDP(start_room,
                             env_file=[self.cube_env],
                             constraints=constraints,
                             ap_maps=ap_maps)
        l2Domain = CubeL2MDP(start_floor,
                             env_file=[self.cube_env],
                             constraints=constraints,
                             ap_maps=ap_maps)

        policy_generators = []
        l0_policy_generator = CubeL0PolicyGenerator(l0Domain,
                                                    env_file=[self.cube_env])
        l1_policy_generator = CubeL1PolicyGenerator(
            l0Domain,
            AbstractCubeL1StateMapper(l0Domain),
            env_file=[self.cube_env],
            constraints=constraints,
            ap_maps=ap_maps)
        l2_policy_generator = CubeL2PolicyGenerator(
            l1Domain,
            AbstractCubeL2StateMapper(l1Domain),
            env_file=[self.cube_env],
            constraints=constraints,
            ap_maps=ap_maps)

        policy_generators.append(l0_policy_generator)
        policy_generators.append(l1_policy_generator)
        policy_generators.append(l2_policy_generator)

        # 2 levels
        l1Subtasks = [
            PrimitiveAbstractTask(action) for action in l0Domain.ACTIONS
        ]
        a2rt = [
            CubeL1GroundedAction(a, l1Subtasks, l0Domain)
            for a in l1Domain.ACTIONS
        ]
        a2rt2 = [
            CubeL2GroundedAction(a, a2rt, l1Domain) for a in l2Domain.ACTIONS
        ]

        l2Root = CubeRootL2GroundedAction(l2Domain.action_for_floor_number(1),
                                          a2rt2,
                                          l2Domain,
                                          l2Domain.terminal_func,
                                          l2Domain.reward_func,
                                          constraints=constraints,
                                          ap_maps=ap_maps)

        agent = AMDPAgent(l2Root, policy_generators, l0Domain)

        # Test - base, l1 domain
        l2Subtasks = [
            PrimitiveAbstractTask(action) for action in l1Domain.ACTIONS
        ]

        agent.solve()

        # Extract action seq, state_seq
        state = RoomCubeState(init_locs[0], init_locs[1], init_locs[2], 0)
        action_seq = []
        state_seq = [state]
        while state in agent.policy_stack[0].keys():
            action = agent.policy_stack[0][state]
            state = l0Domain._transition_func(state, action)

            action_seq.append(action)
            state_seq.append(state)

        print("Plan")
        for i in range(len(action_seq)):
            print("\t", state_seq[i], action_seq[i])
        print("\t", state_seq[-1])
        return action_seq, state_seq
示例#6
0
    def __init__(self,
                 len_x=9,
                 len_y=9,
                 len_z=5,
                 init_loc=(1, 1, 1),
                 goal_locs=[(9, 9, 3)],
                 env_file=[],
                 gamma=0.99,
                 slip_prob=0.00,
                 name="cube_room",
                 is_goal_terminal=True,
                 rand_init=False,
                 step_cost=0.0,
                 constraints={
                     'goal': [],
                     'stay': []
                 },
                 ap_maps={}):
        '''
        Args:
            len_x, len_y, len_z (int)
            init_loc (tuple: (int, int,int))
            goal_locs (list of tuples: [(int, int,int)...]
            env_file: specify environment)
            constraints: logic formula of 'goal' and 'stay' for the reward function
                        - goal (large positive), stay (zero), otherwise (large negative)
            ap_maps: dictionary {ap_symbol: (category, state), ...} ex) {a: ('r', [1]), b:('a',west)}
                    category: floor(f), room(r), lowest level action(a), grid cells (c)
        '''

        # Load environment file

        if len(env_file) == 0:
            print('Fail to initialize RoomCubeMDP')

        else:
            cube_env = env_file[0]
            len_x = cube_env['len_x']
            len_y = cube_env['len_y']
            len_z = cube_env['len_z']
            walls = cube_env['walls']
            self.num_room = cube_env['num_room']
            self.num_floor = cube_env['num_floor']
            self.room_to_locs = cube_env['room_to_locs']
            self.floor_to_rooms = cube_env['floor_to_rooms']
            self.floor_to_locs = cube_env['floor_to_locs']
            self.room_to_floor = cube_env['room_to_floor']

        CubeMDP.__init__(self,
                         len_x,
                         len_y,
                         len_z,
                         init_loc,
                         goal_locs=goal_locs,
                         walls=walls,
                         gamma=gamma,
                         slip_prob=slip_prob,
                         name=name,
                         is_goal_terminal=is_goal_terminal,
                         rand_init=rand_init,
                         step_cost=step_cost)

        if 'lowest' in constraints.keys():
            self.constraints = {'goal': 'a', 'stay': 'b'}
            self.ap_maps = {
                'a': ap_maps['a'],
                'b': [1, 'state',
                      self.get_room_numbers(init_loc)[0]]
            }  # AP --> real world
        else:
            self.constraints = constraints  # constraints for LTL
            self.ap_maps = ap_maps

        init_state = RoomCubeState(init_loc[0], init_loc[1], init_loc[2],
                                   self._transition_q(init_loc, ""))
        if init_state.q != 0:
            init_state.set_terminal(True)

        MDP.__init__(self,
                     RoomCubeMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=init_state,
                     gamma=gamma)
示例#7
0
    def _solve_subproblem_L1(self,
                             init_locs=(1, 1, 1),
                             constraints={},
                             ap_maps={},
                             verbose=False):

        # define l0 domain
        l0Domain = RoomCubeMDP(init_loc=init_locs,
                               env_file=[self.cube_env],
                               constraints=constraints,
                               ap_maps=ap_maps,
                               slip_prob=self.slip_prob)
        backup_num = 0
        # if the current state satisfies the constraint already, we don't have to solve it.
        if l0Domain.init_state.q == 1:
            action_seq = []
            state_seq = [l0Domain.init_state]
        else:
            # define l1 domain
            start_room = l0Domain.get_room_numbers(init_locs)[0]
            l1Domain = CubeL1MDP(start_room,
                                 env_file=[self.cube_env],
                                 constraints=constraints,
                                 ap_maps=ap_maps,
                                 slip_prob=self.slip_prob)

            policy_generators = []
            l0_policy_generator = CubeL0PolicyGenerator(
                l0Domain, env_file=[self.cube_env])
            l1_policy_generator = CubeL1PolicyGenerator(
                l0Domain,
                AbstractCubeL1StateMapper(l0Domain),
                env_file=[self.cube_env],
                constraints=constraints,
                ap_maps=ap_maps)

            policy_generators.append(l0_policy_generator)
            policy_generators.append(l1_policy_generator)

            # 2 levels
            l1Subtasks = [
                PrimitiveAbstractTask(action) for action in l0Domain.ACTIONS
            ]
            a2rt = [
                CubeL1GroundedAction(a, l1Subtasks, l0Domain)
                for a in l1Domain.ACTIONS
            ]
            l1Root = CubeRootL1GroundedAction(
                l1Domain.action_for_room_number(0),
                a2rt,
                l1Domain,
                l1Domain.terminal_func,
                l1Domain.reward_func,
                constraints=constraints,
                ap_maps=ap_maps)

            agent = AMDPAgent(l1Root, policy_generators, l0Domain)
            agent.solve()
            backup_num = agent.backup_num

            state = RoomCubeState(init_locs[0], init_locs[1], init_locs[2], 0)
            action_seq = []
            state_seq = [state]
            while state in agent.policy_stack[0].keys():
                action = agent.policy_stack[0][state]
                state = l0Domain._transition_func(state, action)

                action_seq.append(action)
                state_seq.append(state)

        if verbose:
            print("Plan")
            for i in range(len(action_seq)):
                print("\t", state_seq[i], action_seq[i])
            print("\t", state_seq[-1])

        return action_seq, state_seq, backup_num