示例#1
0
 def get_all_possible_states(self):
     """
     Returns a list containing all the possible states in the MDP
     :return: List of GridWorldState
     """
     state_list = []
     walls = self.compute_walls()
     for col_idx, column in enumerate(range(1, self.height + 1, 1)):
         for row_idx, row in enumerate(range(self.width, 0, -1)):
             if (not (column, row) in walls):
                 state = GridWorldState(column, row)
                 if (column, row) in self.goal_location:
                     state._is_terminal = True
                 state_list.append(state)
     return state_list
示例#2
0
    def __init__(self,
                 height=11,
                 width=11,
                 init_state=(1, 1),
                 gamma=0.99,
                 slip_prob=0.0,
                 goal_location=None,
                 goal_value=1.0,
                 build_walls=True
                 ):
        super().__init__(actions=list(Dir),
                         init_state=GridWorldState(init_state[0], init_state[1]),
                         gamma=gamma)
        self.height = height
        self.width = width
        self.slip_prob = slip_prob
        if goal_location is None:
            self.goal_location = [(width, height)]
        else:
            self.goal_location = goal_location
        self.goal_value = goal_value
        self.walls = []

        if build_walls:
            self.walls = self.compute_walls()

        self.hallway_states = [(3,6), (6,3), (6,8), (8,5)]
        self.int_rewards_received = []
示例#3
0
 def visualizeLearnedPolicy(self, agent):
     """
     Shows best action learned at each state of the MDP
     :return:
     """
     screen = pygame.display.set_mode(
         [self.screen_width, self.screen_height])
     mdp_env = self.createAbstractGridWorldMDP()
     WIDTH_DIM = self.abstr_mdp.mdp.get_width()
     HEIGHT_DIM = self.abstr_mdp.mdp.get_height()
     walls = self.abstr_mdp.mdp.compute_walls()
     pygame.init()
     complete_viz = False
     while True:
         for event in pygame.event.get():
             if event.type == pygame.QUIT: sys.exit()
         if (not complete_viz):
             for col_idx, column in enumerate(range(1, HEIGHT_DIM + 1, 1)):
                 for row_idx, row in enumerate(range(WIDTH_DIM, 0, -1)):
                     if (not (column, row) in walls):
                         ground_state = GridWorldState(column, row)
                         abs_state = self.abstr_mdp.get_abstr_from_ground(
                             ground_state)
                         print("abs_state", abs_state)
                         best_action = agent.get_best_action(abs_state)
                         print(best_action)
                         action_img = self.createAction(best_action)
                         mdp_and_action = self.placeAction(
                             action_img, ground_state, mdp_env)
                         screen.blit(mdp_and_action, (0, 0))
                         pygame.display.flip()
         complete_viz = True
def test_rollout_adjustment(key):
    """
    Train the agent on a state abstraction with fatal errors. Then generate a roll-out, detach the first state that's
    part of a cycle, and restart learning.
    """
    # Load a poorly-performing abstraction
    names = ['AbstrType', 'AbstrEps', 'CorrType', 'CorrProp', 'Batch', 'Dict']
    df = pd.read_csv('../abstr_exp/corrupted/corrupted_abstractions.csv', names=names)
    abstr_string = df.loc[(df['AbstrType'] == str(key[0]))
                        & (df['AbstrEps'] == key[1])
                        & (df['CorrType'] == str(key[2]))
                        & (df['CorrProp'] == key[3])
                        & (df['Batch'] == key[4])]['Dict'].values[0]
    abstr_list = ast.literal_eval(abstr_string)
    abstr_dict = {}
    for el in abstr_list:
        is_term = el[0][0] == 11 and el[0][1] == 11
        state = GridWorldState(el[0][0], el[0][1], is_terminal=is_term)
        abstr_dict[state] = el[1]

    # Create an agent with this abstraction
    s_a = StateAbstraction(abstr_dict, abstr_type=Abstr_type.PI_STAR)
    mdp = GridWorldMDP()
    agent = AbstractionAgent(mdp, s_a=s_a)

    # This is useful for later
    agent2 = copy.deepcopy(agent)

    # Generate a roll-out from a trained agent after 10000 steps
    for i in range(5000):
        agent.explore()
    rollout = agent.generate_rollout()
    print('Roll-out for model with no adjustment, 5,000 steps')
    for state in rollout:
        print(state, end=', ')
    for i in range(5000):
        agent.explore()
    rollout = agent.generate_rollout()
    print('Roll-out for model with no adjustment, 10,000 steps')
    for state in rollout:
        print(state, end=', ')
    print('\n')

    # Train an agent for 5000 steps, detach the first state in the cycle, and train for another 5000 steps
    #  The hope is that this will get further than the 10000 step one
    for i in range(5000):
        agent2.explore()
    rollout = agent2.generate_rollout()
    print('Roll-out for model pre-adjustment, 5,000 steps')
    for state in rollout:
        print(state, end=', ')
    print()
    print('Detaching state', rollout[-1])
    agent2.detach_state(rollout[-1])
    for i in range(5000):
        agent2.explore()
    rollout = agent2.generate_rollout()
    print('Roll-out for model post-adjustment, 10,000 steps')
    for state in rollout:
        print(state, end=', ')
    def __call__(self, state, action, mdp):
        '''
        This needs access to the MDP parameters

        Parameters:
            state:GridWorldState
            action:Enum
            mdp:GridWorldMDP

        Returns:
            state:GridWorldState
        '''
        next_state = state

        # If terminal, do nothing
        if state.is_terminal():
            return next_state

        # Apply slip probability and change action if applicable
        if random.random() < self.slip_prob:
            if action in [Dir.UP, Dir.DOWN]:
                action = random.choice([Dir.LEFT, Dir.RIGHT])
            elif action in [Dir.LEFT, Dir.RIGHT]:
                action = random.choice([Dir.UP, Dir.DOWN])

        # Calculate next state based on action
        if action == Dir.UP and state.y < mdp.height and (state.x, state.y +
                                                          1) not in mdp.walls:
            next_state = GridWorldState(state.x, state.y + 1)
        if action == Dir.DOWN and state.y > 1 and (state.x, state.y -
                                                   1) not in mdp.walls:
            next_state = GridWorldState(state.x, state.y - 1)
        if action == Dir.LEFT and state.x > 1 and (state.x - 1,
                                                   state.y) not in mdp.walls:
            next_state = GridWorldState(state.x - 1, state.y)
        if action == Dir.RIGHT and state.x < mdp.width and (
                state.x + 1, state.y) not in mdp.walls:
            next_state = GridWorldState(state.x + 1, state.y)

        if (next_state.x, next_state.y) in mdp.goal_location:
            next_state.set_terminal(True)

        return next_state
示例#6
0
 def get_all_possible_states(self):
     """
     Create a list of all possible states in the MDP
     """
     state_list = []
     for x in range(1, self.total_width + 1):
         for y in range(1, self.total_height + 1):
             #print('Checking if', x, y, 'is a state')
             state = GridWorldState(x, y)
             if self.is_inside_rooms(state):
                 state_list.append(state)
         #print()
     return state_list
示例#7
0
    def transition(self, state, action):
        '''
        Parameters:
            state:GridWorldState
            action:Enum
            mdp:GridWorldMDP

        Returns:
            state:GridWorldState
        '''
        next_state = state

        # If MDP is already in the goal state, no actions should be available
        if self.is_goal_state(state):
            return state

        # Apply slip probability and change action if applicable
        if random.random() < self.slip_prob:
            if action in [Dir.UP, Dir.DOWN]:
                action = random.choice([Dir.LEFT, Dir.RIGHT])
            elif action in [Dir.LEFT, Dir.RIGHT]:
                action = random.choice([Dir.UP, Dir.DOWN])

        # Calculate next state based on action
        if action == Dir.UP and state.y < self.height and (state.x, state.y + 1) not in self.walls:
            next_state = GridWorldState(state.x, state.y + 1)
        if action == Dir.DOWN and state.y > 1 and (state.x, state.y - 1) not in self.walls:
            next_state = GridWorldState(state.x, state.y - 1)
        if action == Dir.LEFT and state.x > 1 and (state.x - 1, state.y) not in self.walls:
            next_state = GridWorldState(state.x - 1, state.y)
        if action == Dir.RIGHT and state.x < self.width and (state.x + 1, state.y) not in self.walls:
            next_state = GridWorldState(state.x + 1, state.y)

        # If the next state takes the agent into the goal location,
        # return initial state
        if (next_state.x, next_state.y) in self.goal_location:
            next_state.set_terminal(True)
        return next_state
示例#8
0
    def createAbstractGridWorldMDP(self):
        """
        Creates and returns a Pygame Surface from the Abstract MDP this class is initialized with.
        All cells that belong to the same abstract class are shown in the same color
        :return:
        """
        WIDTH_DIM = self.abstr_mdp.mdp.get_width()
        HEIGHT_DIM = self.abstr_mdp.mdp.get_height()
        rand_color = randomcolor.RandomColor()
        #dictionary of abstract state to colors
        abs_to_color = {}

        WINDOW_WIDTH = (self.cell_size + self.margin) * WIDTH_DIM + self.margin
        WINDOW_HEIGTH = (self.cell_size +
                         self.margin) * HEIGHT_DIM + self.margin
        screen = pygame.Surface((WINDOW_WIDTH, WINDOW_HEIGTH))
        window = pygame.Rect(0, 0, WINDOW_HEIGTH, WINDOW_WIDTH)
        walls = self.abstr_mdp.mdp.compute_walls()
        # draw background
        pygame.draw.rect(screen, BLACK, window)
        # draw cells

        for col_idx, column in enumerate(range(1, HEIGHT_DIM + 1, 1)):
            for row_idx, row in enumerate(range(WIDTH_DIM, 0, -1)):
                color = WHITE
                if (column, row) in walls:
                    color = BLACK
                else:
                    ground_state = GridWorldState(column, row)
                    abs_state = self.abstr_mdp.get_abstr_from_ground(
                        ground_state)
                    print("ground state", ground_state)
                    print("abstract state", abs_state)

                    if (abs_state in abs_to_color):
                        new_color = abs_to_color[abs_state]
                    else:
                        new_color = rand_color.generate()
                        while (new_color in abs_to_color.values()):
                            new_color = rand_color.generate()
                        abs_to_color[abs_state] = new_color
                    color = pygame.Color(new_color[0])
                pygame.draw.rect(
                    screen, color,
                    [(self.margin + self.cell_size) * (col_idx) + self.margin,
                     (self.margin + self.cell_size) *
                     (row_idx) + self.margin, self.cell_size, self.cell_size])
        return screen
def test_detach_state(agent):
    # Test that detach_state both removes the state from the abstraction dictionary and resets the Q-table to 0
    #  We select this state to remove since we are guaranteed to always interact with it
    state_to_remove = GridWorldState(1, 1)
    print('State and abstr state prior to detach:', state_to_remove, agent.s_a.abstr_dict[state_to_remove])
    print('Other states in this abstract state: ', end = '')
    for temp_state in agent.mdp.get_all_possible_states():
        if agent.s_a.abstr_dict[temp_state] == agent.s_a.abstr_dict[state_to_remove]:
            print(temp_state, end = ' ')
    print()
    for i in range(5000):
        agent.explore()
    print()
    print('Q-value of state after exploring: (should be non-zero)')
    for i in range(len(agent.mdp.actions)):
        print(agent.mdp.actions[i], agent.get_q_value(state_to_remove, agent.mdp.actions[i]))
    print()
    agent.detach_state(state_to_remove, reset_q_value=True)

    print('State and abstr state after detach:', state_to_remove, agent.s_a.abstr_dict[state_to_remove])
    print('Q-value of state after detaching: (should be zero)')
    for i in range(len(agent.mdp.actions)):
        print(agent.mdp.actions[i], agent.get_q_value(state_to_remove, agent.mdp.actions[i]))
    print()
    for i in range(5000):
        agent.explore()
    print('Q-value of state after exploring again: (should be non-zero)')
    for i in range(len(agent.mdp.actions)):
        print(agent.mdp.actions[i], agent.get_q_value(state_to_remove, agent.mdp.actions[i]))
    print('\n'*3)
    print('Full Q-table:')
    for key, value in agent.get_q_table().items():
        print(key[0], key[1], value)

    # Check that the ground -> abstr and abstr -> ground mappings correspond
    for key in agent.group_dict.keys():
        for state in agent.all_possible_states:
            if agent.s_a.abstr_dict[state] == key and state not in agent.group_dict[key]:
                print('F**K', key, state)
    print('Success!')
def test_check_for_optimal_action_and_value(states, num_steps):
    """
    Create a list of actions generated by following the greedy policy, starting at the given state
    """
    mdp = GridWorldMDP()
    abstr_mdp = mdp.make_abstr_mdp(Abstr_type.Q_STAR)
    agent = AbstractionAgent(mdp, s_a=abstr_mdp.state_abstr)
    for i in range(100000):
        if i % 1000 == 0:
            print('On step', i)
        agent.explore()

    # print(agent.get_learned_policy_as_string())
    policy = agent.get_learned_policy()
    #for key, value in agent.get_learned_policy_as_string().items():
    #    print(key, value, agent.get_q_value(key[0], key[1]))
    for s in agent.mdp.get_all_possible_states():
        #for a in agent.mdp.actions:
        print(s, agent.get_best_action_value(s))

    for state in states:
        mdp_state = GridWorldState(state[0], state[1])
        action, value = agent.check_for_optimal_action_value_next_state(mdp_state, verbose=True)
        print()
示例#11
0
    def transition(self, state, action):

        # If in goal state, no actions available
        if self.is_goal_state(state):
            return state

        # Apply slip probability
        if random.random() < self.slip_prob:
            if action in [Dir.UP, Dir.DOWN]:
                action = random.choice([Dir.LEFT, Dir.RIGHT])
            else:
                action = random.choice([Dir.UP, Dir.DOWN])

        # Start by assigning next_state to current_state. This way we only have to check for cases where action
        #  successfully changes states below
        next_state = state

        # Check if state is outside of the two rooms; if so action should have no effect
        if not self.is_inside_rooms(state):
            return next_state

        # Calculate next state for cases where action changes state; add +1 to upper_height to account for
        #  wall
        if action == Dir.UP:
            # If in lower room not against wall, or in lower room under hallway state, or in upper room
            #  not against wall, or in hallway
            '''
            if state.y < self.lower_height \
                    or (state.y == self.lower_height and state.x in self.hallway_states) \
                    or (self.upper_start_height <= state.y < self.total_height) \
                    or (self.lower_height < state.y < self.upper_start_height and state.x in self.hallway_states):
                next_state = GridWorldState(state.x, state.y + 1)
            '''
            next_state = GridWorldState(state.x, state.y + 1)
            if not self.is_inside_rooms(next_state):
                next_state = GridWorldState(state.x, state.y)

        elif action == Dir.DOWN:
            # In upper room not against wall, in upper room above hallway, or in lower room not against wall, or in
            #  hallway
            '''
            if (state.y > self.upper_start_height) \
                    or (state.y == self.upper_start_height and state.x in self.hallway_states) \
                    or (1 < state.y <= self.lower_height) \
                    or (self.lower_height < state.y < self.upper_start_height and state.x in self.hallway_states):
                next_state = GridWorldState(state.x, state.y - 1)
            '''
            next_state = GridWorldState(state.x, state.y - 1)
            if not self.is_inside_rooms(next_state):
                next_state = GridWorldState(state.x, state.y)

        elif action == Dir.LEFT:
            # In lower room not against wall, or upper room not against wall
            '''
            if (state.y <= self.lower_height and state.x > max(self.lower_offset + 1, 1)) \
                    or (state.y >= self.upper_start_height and state.x > max(self.upper_offset + 1, 1)):
                next_state = GridWorldState(state.x - 1, state.y)
            '''
            next_state = GridWorldState(state.x - 1, state.y)
            if not self.is_inside_rooms(next_state):
                next_state = GridWorldState(state.x, state.y)

        elif action == Dir.RIGHT:
            # In lower room not against wall, or upper room not against wall
            '''
            if (state.y <= self.lower_height and state.x < self.lower_width + self.lower_offset) \
                    or (state.y >= self.upper_start_height and state.x < self.upper_width + self.upper_offset):
                next_state = GridWorldState(state.x + 1, state.y)
            '''
            next_state = GridWorldState(state.x + 1, state.y)
            if not self.is_inside_rooms(next_state):
                next_state = GridWorldState(state.x, state.y)

        # If agent enters goal state, make next state terminal
        if (next_state.x, next_state.y) in self.goal_location:
            next_state.set_terminal(True)
        return next_state
示例#12
0
    def __init__(self,
                 upper_width=5,
                 upper_height=5,
                 lower_width=5,
                 lower_height=5,
                 upper_offset=0,
                 lower_offset=0,
                 init_state=(1, 1),
                 goal_location=None,
                 slip_prob=0.0,
                 goal_value=1.0,
                 hallway_states=[3],
                 hallway_height=1,
                 gamma=0.99):
        """
        :param upper_width: width (x-coordinate) of upper room
        :param upper_height: height (y-coordinate) of upper room
        :param lower_width: width of lower room
        :param lower_height: height of upper room
        :param upper_offset: shift upper room to the right by this value
        :param lower_offset: shift lower room to the right by this value
        :param init_state: starting state (x,y)
        :param goal_location: goal state (x,y)
        :param slip_prob: probably of random action instead of selected action
        :param goal_value: reward on reaching goal
        :param hallway_states: tuple of states through which the agent can move to get from
                one room to another
        :param hallway_height: length of the hallway states
        :param gamma: discount factor
        """
        super().__init__(actions=list(Dir),
                         init_state=GridWorldState(init_state[0],
                                                   init_state[1]),
                         gamma=gamma)
        lower_bound = min(upper_offset, lower_offset)
        upper_offset = upper_offset - lower_bound
        lower_offset = lower_offset - lower_bound
        self.upper_width = upper_width
        self.upper_height = upper_height
        self.lower_width = lower_width
        self.lower_height = lower_height
        self.upper_offset = upper_offset
        self.lower_offset = lower_offset
        self.goal_location = goal_location
        self.goal_value = goal_value
        self.slip_prob = slip_prob
        self.hallway_states = hallway_states
        self.hallway_height = hallway_height

        # Hallway states shouldn't be wider than either room
        #if max(self.hallway_states) > min(self.upper_width + self.upper_offset, self.lower_width + self.lower_offset) \
        #        or min(self.hallway_states) < min(self.upper_offset, self.lower_offset):
        #    raise ValueError('Hallway states extend beyond room widths ' + str(self.hallway_states) )

        # Some useful values
        self.total_height = self.lower_height + self.upper_height + self.hallway_height
        #print(self.lower_height, self.upper_height, self.hallway_height)
        self.total_width = max(self.lower_offset + self.lower_width,
                               self.upper_offset + self.upper_width)
        self.upper_start_height = self.lower_height + self.hallway_height + 1
        #print('In MDP. total_width, total_height =', self.total_width, self.total_height)

        # If no goal location given, make goal location be the upper right hand corner of the upper room; if there is
        #  no upper room, make it upper-right hand corner of lower room
        if self.goal_location is None:
            if self.upper_width > 0 and self.upper_height > 0:
                self.goal_location = [(self.upper_width + self.upper_offset,
                                       self.total_height)]
            else:
                self.goal_location = [(self.lower_offset + self.lower_width,
                                       self.lower_height)]
        # If goal location is outside rooms, raise value error
        for loc in self.goal_location:
            if not self.is_inside_rooms(GridWorldState(loc[0], loc[1])):
                raise ValueError('Goal location is outside rooms ' +
                                 str([loc for loc in self.goal_location]))
示例#13
0
from GridWorld.TwoRoomsMDP import TwoRoomsMDP
from GridWorld.GridWorldStateClass import GridWorldState
from MDP.ValueIterationClass import ValueIteration

if __name__ == '__main__':

    test_num = 8

    # (1) Check that each state-action combination on the default arguments yields the expected results
    if test_num == 1:
        mdp = TwoRoomsMDP()
        print('Checking all state-action combos')
        # 5 squares wide, 11 squares tall (including hallway)
        for x in range(1, 20):
            for y in range(1, 20):
                if mdp.is_inside_rooms(GridWorldState(x, y)):
                    for action in mdp.actions:
                        state = GridWorldState(x, y)
                        next_state = mdp.transition(state, action)
                        if state != next_state:
                            print(state, action, next_state)
                    print()

    # (2) Upper offset
    elif test_num == 2:
        mdp = TwoRoomsMDP(upper_offset=1)
        for x in range(1, 20):
            for y in range(1, 20):
                if mdp.is_inside_rooms(GridWorldState(x, y)):
                    for action in mdp.actions:
                        state = GridWorldState(x, y)
        # Add group dict (for detachment)
        self.group_dict = self.reverse_abstr_dict(self.s_a.abstr_dict)


# Testing use only
if __name__ == '__main__':

    # Create environment
    mdp = TwoRoomsMDP(lower_width=3,
                      upper_width=3,
                      lower_height=3,
                      upper_height=3,
                      hallway_states=[3],
                      goal_location=[(1, 5)])
    error_dict = {
        GridWorldState(1, 2): GridWorldState(2, 5),
        GridWorldState(3, 3): GridWorldState(1, 6)
    }

    ABSTR_TYPE = Abstr_type.Q_STAR
    ERROR_NUM = 6

    mdp = GridWorldMDP()
    if ABSTR_TYPE == Abstr_type.Q_STAR:
        abstr_mdp = mdp.make_abstr_mdp(Abstr_type.Q_STAR)
        if ERROR_NUM == 1:
            error_dict = {
                GridWorldState(6, 3): GridWorldState(10, 9),
                GridWorldState(9, 10): GridWorldState(9, 3)
            }
        elif ERROR_NUM == 2:
def iterate_detachment(mdp_key, batch_size=5000):
    """
    Load an incorrect abstraction. Train the model, generate a roll-out, detach the first cycle state. Repeat until
    the roll-out achieves a terminal state. Save the adjusted abstraction and learned policy. Visualize the original
    incorrect abstraction with roll-outs from original agents and the adjusted abstraction with a roll-out from the
    new agent
    :param key: key for incorrect (poorly performing) abstraction
    :param batch_size: Number of steps to train between state detachments
    """
    # Load a poorly-performing abstraction
    names = ['AbstrType', 'AbstrEps', 'CorrType', 'CorrProp', 'Batch', 'Dict']
    df = pd.read_csv('../abstr_exp/corrupted/corrupted_abstractions.csv', names=names)
    abstr_string = df.loc[(df['AbstrType'] == str(mdp_key[0]))
                        & (df['AbstrEps'] == mdp_key[1])
                        & (df['CorrType'] == str(mdp_key[2]))
                        & (df['CorrProp'] == mdp_key[3])
                        & (df['Batch'] == mdp_key[4])]['Dict'].values[0]
    abstr_list = ast.literal_eval(abstr_string)
    abstr_dict = {}
    for el in abstr_list:
        is_term = el[0][0] == 11 and el[0][1] == 11
        state = GridWorldState(el[0][0], el[0][1], is_terminal=is_term)
        abstr_dict[state] = el[1]

    # Create an agent with this abstraction
    s_a = StateAbstraction(abstr_dict, abstr_type=Abstr_type.PI_STAR)
    mdp = GridWorldMDP()
    agent = AbstractionAgent(mdp, s_a=s_a)

    # Generate a roll-out from untrained model (should be random and short)
    rollout = agent.generate_rollout()
    print('Roll-out from untrained model')
    for state in rollout:
        print(state, end=', ')
    print()

    # Until roll-out leads to terminal state, explore and detach last state of roll-out. Record each of the detached
    #  states so they can be visualized later
    detached_states = []
    step_counter = 0
    while not rollout[-1].is_terminal():
        for i in range(batch_size):
            agent.explore()
        step_counter += batch_size
        rollout = agent.generate_rollout()
        print('Roll-out after', step_counter, 'steps')
        for state in rollout:
            print(state, end=', ')
        print()
        print('State Q-value pre-detach:')
        for action in agent.mdp.actions:
            print(rollout[-1], action, agent.get_q_value(rollout[-1], action))
        detach_flag = agent.detach_state(rollout[-1])
        if detach_flag == 0:
            print('Detaching state', rollout[-1])
            detached_states.append(rollout[-1])
        elif detach_flag == 1:
            print(rollout[-1], 'already a singleton state. No change.')
        print('State Q-value post-detach:')
        for action in agent.mdp.actions:
            print(rollout[-1], action, agent.get_q_value(rollout[-1], action))
        print()
    for key, value in agent.get_q_table():
        print(key, value)

    # Save resulting adapted state abstraction and learned policy
    s_a_file = open('../abstr_exp/adapted/adapted_abstraction.csv', 'w', newline='')
    s_a_writer = csv.writer(s_a_file)
    print(mdp_key)
    s_a_writer.writerow((mdp_key[0], mdp_key[1], mdp_key[2], mdp_key[3], mdp_key[4], agent.get_abstraction_as_string()))
    s_a_file.close()

    policy_file = open('../abstr_exp/adapted/learned_policy.csv', 'w', newline='')
    policy_writer = csv.writer(policy_file)
    policy_writer.writerow((mdp_key[0], mdp_key[1], mdp_key[2], mdp_key[3], mdp_key[4],
                            agent.get_learned_policy_as_string()))
    policy_file.close()

    # Visualize the adapted state abstraction and learned policy, along with the original for comparison
    viz = GridWorldVisualizer()
    surface = viz.create_corruption_visualization(mdp_key,
                                                  '../abstr_exp/adapted/adapted_abstraction.csv',
                                                  error_file='../abstr_exp/corrupted/error_states.csv')
    # Draw small white circles over the states that were detached
    for state in detached_states:
        print(state, end=', ')
    #for d_state in
    viz.display_surface(surface)
示例#16
0
    '''

    # True A-star with episode buffer
    '''
    test_udm(mdp, Abstr_type.A_STAR, EPISODE_COUNT, episode_buffer=10)
    quit()          
    '''

    # True Pi-Star with episode buffer
    '''
    test_udm(mdp, Abstr_type.PI_STAR, EPISODE_COUNT, episode_buffer=20)
    quit()
    '''

    # Bad error 1
    error_dict = {GridWorldState(1, 2): GridWorldState(2, 5)}

    # Q-Star with bad error 1
    '''
    test_udm(mdp,
             Abstr_type.Q_STAR,
             EPISODE_COUNT,
             error_dict=error_dict,
             episode_buffer=10)
    quit()
    '''

    # A-star with bad error 1
    '''
    test_udm(mdp, Abstr_type.A_STAR, EPISODE_COUNT, error_dict=error_dict)
    quit() 
示例#17
0
    def get_next_possible_states(self, state, action):
        """
        Get a dictionary (States -> floats), mapping states to the probability that that state
        is reached by the given (state, action) pair
        """
        next_state_probs = {}

        if self.is_goal_state(state):
            next_state_probs[state] = 1
            return next_state_probs

        up_state = GridWorldState(state.x, state.y + 1)
        down_state = GridWorldState(state.x, state.y - 1)
        left_state = GridWorldState(state.x - 1, state.y)
        right_state = GridWorldState(state.x + 1, state.y)
        # can the agent move left?
        left_cond = self.is_inside_rooms(GridWorldState(state.x - 1, state.y))
        # can the agent move right?
        right_cond = self.is_inside_rooms(GridWorldState(state.x + 1, state.y))
        # can the agent move down?
        down_cond = self.is_inside_rooms(GridWorldState(state.x, state.y - 1))
        # can the agent move up
        up_cond = self.is_inside_rooms(GridWorldState(state.x, state.y + 1))

        # Set next_state_probs for current state so it can be incremented later
        next_state_probs[state] = 0

        # I'm sure there's a cleaner way to do this but what the hell
        if action == Dir.UP:
            if (up_cond):
                next_state_probs[up_state] = 1 - self.slip_prob
            else:
                next_state_probs[state] += (1 - self.slip_prob)
            # what if it slips?: it would either slip right or left
            if (left_cond):
                next_state_probs[left_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
            if (right_cond):
                next_state_probs[right_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
        elif action == Dir.DOWN:
            if (down_cond):
                next_state_probs[down_state] = (1 - self.slip_prob)
            else:
                next_state_probs[state] += (1 - self.slip_prob)
            # what if it slips?: it would either slip right or left
            if (left_cond):
                next_state_probs[left_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
            if (right_cond):
                next_state_probs[right_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
        elif action == Dir.LEFT:
            if (left_cond):
                next_state_probs[left_state] = (1 - self.slip_prob)
            else:
                next_state_probs[state] += (1 - self.slip_prob)
            # what if it slips?: it would either slip up or down
            if (up_cond):
                next_state_probs[up_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
            if (down_cond):
                next_state_probs[down_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
        elif action == Dir.RIGHT:
            if (right_cond):
                next_state_probs[right_state] = 1 - self.slip_prob
            else:
                next_state_probs[state] += (1 - self.slip_prob)
            # what if it slips?: it would either slip up or down
            if (up_cond):
                next_state_probs[up_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
            if (down_cond):
                next_state_probs[down_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2

        # In the end remove keys whose value is 0
        next_state_probs = {k: v for k, v in next_state_probs.items() if v}
        return next_state_probs
示例#18
0
    def get_next_possible_states(self, state, action):
        """
        For value iteration, part of model: given a state and an action, outputs a dictionary of State->probability
        that gives each state that the agent can end up in from the given state if they took the given action and with what probability
        :param state: State
        :param action: ActionEnum
        :return: dictionary of State->Float (probability, should be less than one)
        """
        next_state_probs = {}

        # if we are in the goal state, every action will take us back to the goal state
        if (self.is_goal_state(state)):
            next_state_probs[state] = 1
            return next_state_probs

        # set the probability of ending back at the current state as 0, so it can be incremented later
        next_state_probs[state] = 0

        up_state = GridWorldState(state.x, state.y + 1)
        down_state = GridWorldState(state.x, state.y - 1)
        left_state = GridWorldState(state.x - 1, state.y)
        right_state = GridWorldState(state.x + 1, state.y)
        # can the agent move left?
        left_cond = (state.x > 1 and (state.x - 1, state.y) not in self.walls)
        # can the agent move right?
        right_cond = (state.x < self.width and (state.x + 1, state.y) not in self.walls)
        # can the agent move down?
        down_cond = (state.y > 1 and (state.x, state.y - 1) not in self.walls)
        # can the agent move up
        up_cond = (state.y < self.height and (state.x, state.y + 1) not in self.walls)
        if action == Dir.UP:
            if (up_cond):
                next_state_probs[up_state] = 1 - self.slip_prob
            else:
                next_state_probs[state] += (1 - self.slip_prob)
            # what if it slips?: it would either slip right or left
            if (left_cond):
                next_state_probs[left_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
            if (right_cond):
                next_state_probs[right_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
        elif action == Dir.DOWN:
            if (down_cond):
                next_state_probs[down_state] = (1 - self.slip_prob)
            else:
                next_state_probs[state] += (1 - self.slip_prob)
            # what if it slips?: it would either slip right or left
            if (left_cond):
                next_state_probs[left_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
            if (right_cond):
                next_state_probs[right_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
        elif action == Dir.LEFT:
            if (left_cond):
                next_state_probs[left_state] = (1 - self.slip_prob)
            else:
                next_state_probs[state] += (1 - self.slip_prob)
            # what if it slips?: it would either slip up or down
            if (up_cond):
                next_state_probs[up_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
            if (down_cond):
                next_state_probs[down_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
        elif action == Dir.RIGHT:
            if (right_cond):
                next_state_probs[right_state] = 1 - self.slip_prob
            else:
                next_state_probs[state] += (1 - self.slip_prob)
            # what if it slips?: it would either slip up or down
            if (up_cond):
                next_state_probs[up_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2
            if (down_cond):
                next_state_probs[down_state] = self.slip_prob / 2
            else:
                next_state_probs[state] += self.slip_prob / 2

        # In the end remove keys whose value is 0
        next_state_probs = {k: v for k, v in next_state_probs.items() if v}
        return next_state_probs