Exemplo n.º 1
0
    def _transition_func(self, state, action):
        next_state_xyz = super()._transition_func(state, action)

        next_q = self._transition_q(
            (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action)

        next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y,
                                   next_state_xyz.z, next_q)

        if next_q != 0:
            next_state.set_terminal(True)
            #next_state._is_terminal = (next_q == 1)

        return next_state
Exemplo n.º 2
0
    def _transition_func(self, state, action):
        next_state_xyz = super()._transition_func(state, action)

        evaluated_APs = self._evaluate_APs(
            (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action)
        next_q = self.automata.transition_func(state.q, evaluated_APs)

        next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y,
                                   next_state_xyz.z, next_q)

        if self.automata.aut_spot.state_is_accepting(next_q):
            next_state.set_terminal(True)

        return next_state
Exemplo n.º 3
0
    def _transition_func(self, state, action):
        next_state_xyz = super()._transition_func(state, action)

        next_q = 0

        #print('{}: {}, {}, {}, {}'.format(action, next_state_xyz.x, next_state_xyz.y, next_state_xyz.z, next_q))
        next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y,
                                   next_state_xyz.z, next_q)

        flag_terminal = self._evaluate_qstate(
            (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action)

        if flag_terminal == 1:
            next_state.set_terminal(True)

        return next_state
Exemplo n.º 4
0
    def _transition_func(self, state, action):

        if state.is_terminal():
            return state

        next_state_xyz = super()._transition_func(state, action)

        evaluated_APs = self._evaluate_APs(
            (next_state_xyz.x, next_state_xyz.y, next_state_xyz.z), action)

        next_q = self.automata.transition_func(state.q, evaluated_APs)

        if (next_q not in self.constraints['Qg']) and (
                next_q not in self.constraints['Qs']):  # terminal
            next_q = -1

        next_state = RoomCubeState(next_state_xyz.x, next_state_xyz.y,
                                   next_state_xyz.z, next_q)

        next_room = self.loc_to_room[(next_state.x, next_state.y,
                                      next_state.z)]

        if self.constraints['mode'] == 'root':
            if next_state.q in self.constraints['Qg'] or next_state.q == -1:
                next_state.set_terminal(True)

        if self.constraints['mode'] == 'child':
            if next_state.q == -1 or next_state.q in self.constraints['Qg']:
                next_state.set_terminal(True)

            if next_room in self.constraints['Sg']:
                next_state.set_terminal(True)
            elif next_room not in self.constraints['Ss']:
                next_state.set_terminal(True)

        return next_state
Exemplo n.º 5
0
    def __init__(self,
                 len_x=9,
                 len_y=9,
                 len_z=5,
                 init_loc=(1, 1, 1),
                 goal_locs=[(9, 9, 3)],
                 env_file=[],
                 gamma=0.99,
                 slip_prob=0.00,
                 name="cube_room",
                 is_goal_terminal=True,
                 rand_init=False,
                 step_cost=0.0,
                 constraints={
                     'goal': [],
                     'stay': []
                 },
                 ap_maps={}):
        '''
        Args:
            len_x, len_y, len_z (int)
            init_loc (tuple: (int, int,int))
            goal_locs (list of tuples: [(int, int,int)...]
            env_file: specify environment)
            constraints: logic formula of 'goal' and 'stay' for the reward function
                        - goal (large positive), stay (zero), otherwise (large negative)
            ap_maps: dictionary {ap_symbol: (category, state), ...} ex) {a: ('r', [1]), b:('a',west)}
                    category: floor(f), room(r), lowest level action(a), grid cells (c)
        '''

        # Load environment file

        if len(env_file) == 0:
            print('Fail to initialize RoomCubeMDP')

        else:
            cube_env = env_file[0]
            len_x = cube_env['len_x']
            len_y = cube_env['len_y']
            len_z = cube_env['len_z']
            walls = cube_env['walls']
            self.num_room = cube_env['num_room']
            self.num_floor = cube_env['num_floor']
            self.room_to_locs = cube_env['room_to_locs']
            self.floor_to_rooms = cube_env['floor_to_rooms']
            self.floor_to_locs = cube_env['floor_to_locs']
            self.room_to_floor = cube_env['room_to_floor']

        CubeMDP.__init__(self,
                         len_x,
                         len_y,
                         len_z,
                         init_loc,
                         goal_locs=goal_locs,
                         walls=walls,
                         gamma=gamma,
                         slip_prob=slip_prob,
                         name=name,
                         is_goal_terminal=is_goal_terminal,
                         rand_init=rand_init,
                         step_cost=step_cost)

        if 'lowest' in constraints.keys():
            self.constraints = {'goal': 'a', 'stay': 'b'}
            self.ap_maps = {
                'a': ap_maps['a'],
                'b': [1, 'state',
                      self.get_room_numbers(init_loc)[0]]
            }  # AP --> real world
        else:
            self.constraints = constraints  # constraints for LTL
            self.ap_maps = ap_maps

        init_state = RoomCubeState(init_loc[0], init_loc[1], init_loc[2],
                                   self._transition_q(init_loc, ""))
        if init_state.q != 0:
            init_state.set_terminal(True)

        MDP.__init__(self,
                     RoomCubeMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=init_state,
                     gamma=gamma)