Python GridWorldMDP.__init__示例，simple_rl.tasks.GridWorldMDP.__init__ Python示例

示例#1

0

显示文件

文件： gridworld.py 项目： phymucs/llrl

 def __init__(self,
              width=5,
              height=3,
              init_loc=(1, 1),
              rand_init=False,
              goal_locs=[(5, 3)],
              lava_locs=[()],
              walls=[],
              is_goal_terminal=True,
              gamma=0.99,
              slip_prob=0.0,
              step_cost=0.0,
              lava_cost=0.01,
              goal_rewards=[1.],
              name="Grid-world"):
     GridWorldMDP.__init__(self,
                           width=width,
                           height=height,
                           init_loc=init_loc,
                           rand_init=rand_init,
                           goal_locs=goal_locs,
                           lava_locs=lava_locs,
                           walls=walls,
                           is_goal_terminal=is_goal_terminal,
                           gamma=gamma,
                           slip_prob=slip_prob,
                           step_cost=step_cost,
                           lava_cost=lava_cost,
                           name=name)
     self.goal_rewards = goal_rewards
     self.slip_unidirectional = True

示例#2

0

显示文件

    def __init__(self,
                 col_sq_locs_dict,
                 width=5,
                 height=3,
                 init_loc=(1, 1),
                 goal_locs=[(5, 3)]):
        '''
        Args:
            col_sq_locs_dict (dict):
                Key: int (width)
                Val: dict
                    Key: int (height)
                    Val: color
            width (int)
            height (int)
            init_loc (tuple)
            goal_locs (list of tuples)
        '''
        GridWorldMDP.__init__(self,
                              width,
                              height,
                              init_loc=init_loc,
                              goal_locs=goal_locs)

        self.init_state = ColoredGridWorldState(
            init_loc[0], init_loc[1],
            col_sq_locs_dict[init_loc[0]][init_loc[1]])
        self.col_sq_locs_dict = col_sq_locs_dict

示例#3

0

显示文件

    def __init__(self,
                 width=5,
                 height=3,
                 init_loc=(1, 1),
                 rand_init=False,
                 goal_locs=[(5, 3)],
                 lava_locs=[()],
                 walls=[],
                 safe_locs=[],
                 is_goal_terminal=True,
                 gamma=0.99,
                 slip_prob=0.0,
                 step_cost=0.0,
                 lava_cost=1,
                 name="gridworld",
                 gui=False):

        GridWorldMDP.__init__(self, width, height, init_loc, rand_init,
                              goal_locs, lava_locs, walls, is_goal_terminal,
                              gamma, slip_prob, step_cost, lava_cost, name)

        self.jump_dist = 2

        self.actions = UnsafeGridWorldMDP.ACTIONS
        self.safe_states = set()
        for state in self.get_all_states():
            if (state.x, state.y) in safe_locs:
                self.safe_states.add(state)

        self.gui = gui
        if gui:
            self.screen = pygame.display.set_mode(
                (SCREEN_HEIGHT, SCREEN_HEIGHT))
            self.agent_shape = None

            # Pygame setup.
            pygame.init()
            self.screen.fill((255, 255, 255))
            pygame.display.update()

            self.agent_shape = self._draw_state(self.init_state,
                                                draw_statics=True)

示例#4

0

显示文件

文件： new_mdp_example.py 项目： david-abel/simple_rl

    def __init__(self, col_sq_locs_dict, width=5, height=3, init_loc=(1, 1), goal_locs=[(5, 3)]):
        '''
        Args:
            col_sq_locs_dict (dict):
                Key: int (width)
                Val: dict
                    Key: int (height)
                    Val: color
            width (int)
            height (int)
            init_loc (tuple)
            goal_locs (list of tuples)
        '''
        GridWorldMDP.__init__(self,
                              width,
                              height,
                              init_loc=init_loc,
                              goal_locs=goal_locs)

        self.init_state = ColoredGridWorldState(init_loc[0], init_loc[1], col_sq_locs_dict[init_loc[0]][init_loc[1]])
        self.col_sq_locs_dict = col_sq_locs_dict

示例#5

0

显示文件

文件： NavigationMDP.py 项目： roma-patel/ltl-amdp

    def __init__(self,
                 width=30,
                 height=30,
                 goal_locs=[(21, 21)],
                 cell_types=["empty", "yellow", "red", "green", "purple"],
                 cell_type_rewards=[0, 0, -10, -10, -10],
                 cell_distribution="probability",
                 # cell_type_probs: default is chosen arbitrarily larger than
                 # percolation threshold for square lattice, which is just an
                 # approximation to match cell distribution with that of the
                 # paper.
                 cell_type_probs=[0.68, 0.17, 0.05, 0.05, 0.05],
                 cell_type_forced_locations=[np.inf, np.inf,
                                             [(1,1),(5,5)], [(2,2)], [4,4]],
                 gamma=0.99,
                 slip_prob=0.00,
                 step_cost=0.0,
                 goal_rewards=[1.0],
                 is_goal_terminal=True,
                 traj_init_cell_types=[0],
                 goal_colors=["blue"],
                 init_loc=(1,1),
                 rand_init=True,
                 init_state=None,
                 name="Navigation MDP"):
        """
        Note: 1. locations and state dimensions start from 1 instead of 0.
              2. 2d locations are interpreted in (x,y) format.
        Args:
            height (int): Height of navigation grid in no. of cells.
            width (int): Width of navigation grid in no. of cells.
            goal_locs (list of tuples: [(int, int)...]): Goal locations.
            cell_type (list of cell types: [str, str, ...]): Non-goal cell types.
            cell_rewards (list of ints): Reward for each @cell_type.
            cell_distribution (str): 
                "probability" - will assign cells according 
                to @cell_type_probs over the state space. 
                "manual" - will  use @cell_type_forced_locations to assign cells to locations.
            cell_type_probs (list of floats): Only applicable when
                @cell_distribution is set to "probability". Specifies probability 
                corresponding to each @cell_type. Values must sum to 1. Each value 
                signifies the probability of occurence of particular cell type in the grid.
                Note: the actual probabilities would be slightly off because 
                this doesn't factor in number of goals.
            cell_type_forced_locations (list of list of tuples 
            [[(x1,y1), (x2,y2)], [(x3,y3), ...], np.inf, ...}):
                Only applicable when @cell_distribution is set to "Manual". Used 
                to specify additional cells and their locations. If elements are 
                set to np.inf, all of them will be sampled uniformly at random.
            goal_colors (list of str/int): Color of goal corresponding to @goal_locs.
                If colors are different, each goal will be represented with 
                a unique feature, otherwise all goals are mapped to same feature.
            traj_init_cell_types (list of ints): To specify which cell types
                are navigable. This is used in sampling empty/drivable states 
                while generating trajectories.
        Not used but are properties of superclass GridWorldMDP:
            init_loc (tuple: (int, int)): (x,y) initial location
            rand_init (bool): Whether to use random initial location
            init_state (GridWorldState): Initial GridWorldState
            """

        assert height > 0 and isinstance(height, int) and width > 0 \
               and isinstance(width, int), "height and widht must be integers and > 0"
        assert len(goal_colors) == len(goal_locs) == len(goal_rewards)
        assert len(cell_types) == len(cell_type_rewards)
        assert cell_distribution == "manual" or len(cell_types) == len(cell_type_probs)
        assert cell_distribution == "probability" or len(cell_types) == len(cell_type_forced_locations)

        self.value_iter = None
        self._policy_invalidated = True
        self.cell_types = cell_types
        GridWorldMDP.__init__(self,
                              width=width,
                              height=height,
                              init_loc=init_loc,
                              rand_init=rand_init,
                              goal_locs=goal_locs,
                              lava_locs=[()],
                              walls=[],  # no walls in this mdp
                              is_goal_terminal=is_goal_terminal,
                              gamma=gamma,
                              init_state=init_state,
                              slip_prob=slip_prob,
                              step_cost=step_cost,
                              name=name)

        # Cell Types
        self.cells = self.__generate_cell_type_grid(
                                            height, width,
                                            cell_distribution, cell_type_probs,
                                            cell_type_forced_locations)
        # Preserve a copy without goals
        self.cells_wo_goals = self.cells.copy()

        # Cell Rewards
        self.cell_type_rewards = cell_type_rewards
        self.cell_rewards = np.asarray(
                        [[self.cell_type_rewards[item] for item in row]
                            for row in self.cells]
                        ).reshape(height,width)
        # Preserve a copy without goals
        self.cell_rewards_wo_goals = self.cell_rewards.copy()

        # Update cells and cell_rewards with goal and its rewards
        self.reset_goals(goal_locs, goal_rewards, goal_colors)

        # Find set of Empty/Navigable cells for sampling trajectory init state
        self.set_traj_init_cell_types(cell_types=traj_init_cell_types)

        # Additional book-keeping
        self.feature_cell_dist = None
        self.feature_cell_dist_normalized = None

示例#6

0

显示文件

    def __init__(
            self,
            width=30,
            height=30,
            init_loc=(1, 1),
            rand_init=True,
            goal_locs=[(21, 21)],
            cell_types=["empty", "yellow", "red", "green", "purple"],
            cell_type_rewards=[0, 0, -10, -10, -10],
            additional_obstacles={},  # this is for additional experimentation only
            gamma=0.99,
            slip_prob=0.00,
            step_cost=0.0,
            goal_reward=1.0,
            is_goal_terminal=True,
            init_state=None,
            vacancy_prob=0.8,
            sample_cell_types=[0],
            use_goal_dist_feature=True,
            goal_color="blue",
            name="Navigation MDP"):
        """
        Note: 1. locations and state dimensions start from 1 instead of 0. 
              2. 2d locations are interpreted in (x,y) format.
        Args:
            height (int)
            width (int)
            init_loc (tuple: (int, int))
            goal_locs (list of tuples: [(int, int)...])
            cell_type (list of cell types: [str, str, ...]): non-goal cell types
            cell_rewards (reward mapping for each cell type: [int, int, ...]): reward value for cells in @cell_type
        """

        assert height > 0 and isinstance(
            height, int) and width > 0 and isinstance(
                width, int), "height and widht must be integers and > 0"
        self.cell_types = cell_types
        GridWorldMDP.__init__(
            self,
            width=width,
            height=height,
            init_loc=init_loc,
            rand_init=rand_init,
            goal_locs=goal_locs,
            lava_locs=[()],
            walls=[],  # no walls in this mdp
            is_goal_terminal=is_goal_terminal,
            gamma=gamma,
            init_state=init_state,
            slip_prob=slip_prob,
            step_cost=step_cost,
            name=name)

        # Probability of each cell type
        if len(additional_obstacles) > 0:
            self.cell_prob = np.zeros(len(self.cell_types))
            self.cell_prob[0] = 1.
        else:
            # Can't say more about these numbers (chose arbitrarily larger than percolation threshold for
            # square lattice). This is just an approximation (to match cell distribution with that of the paper);
            # however, it is not the primary concern here.
            self.cell_prob = [8. * vacancy_prob / 10., 2 * vacancy_prob / 10.
                              ] + [(1 - vacancy_prob) / 3.] * 3

        # Matrix for identifying cell type and associated reward
        self.cells = np.random.choice(len(self.cell_types),
                                      p=self.cell_prob,
                                      size=height * width).reshape(
                                          height, width)

        self.additional_obstacles = additional_obstacles
        for obs_type, obs_locs in self.additional_obstacles.items():
            for obs_loc in obs_locs:
                row, col = self._xy_to_rowcol(obs_loc[0], obs_loc[1])
                self.cells[row, col] = obs_type

        self.cell_type_rewards = cell_type_rewards
        self.cell_rewards = np.asarray(
            [[cell_type_rewards[item] for item in row]
             for row in self.cells]).reshape(height, width)
        self.goal_reward = goal_reward

        # Set goals and their rewards in the matrix
        for g in goal_locs:
            g_r, g_c = self._xy_to_rowcol(g[0], g[1])
            self.cells[g_r, g_c] = len(
                self.cell_types)  # allocate the next type to the goal
            self.cell_rewards[g_r, g_c] = self.goal_reward
        self.goal_locs = goal_locs
        self.use_goal_dist_feature = use_goal_dist_feature
        self.goal_color = goal_color
        self.feature_cell_dist = None
        self.feature_cell_dist_normalized = None
        self.value_iter = None
        self.define_sample_cells(cell_types=sample_cell_types)

示例#7

0

显示文件

文件： NavigationMDP.py 项目： dhalper1/simple_rl-1

    def __init__(self,
                 width=30,
                 height=30,
                 init_loc=(1, 1),
                 rand_init=True,
                 goal_locs=[(21, 21)],
                 cell_types=["empty", "yellow", "red", "green", "purple"],
                 cell_type_rewards=[0, 0, -10, -10, -10],
                 gamma=0.99,
                 slip_prob=0.00,
                 step_cost=0.0,
                 goal_reward=1.0,
                 is_goal_terminal=True,
                 init_state=None,
                 name="Navigation MDP"):
        """
        Note: 1. locations and state dimensions start from 1 instead of 0. 
              2. 2d locations are interpreted in (x,y) format.
        Args:
            height (int)
            width (int)
            init_loc (tuple: (int, int))
            goal_locs (list of tuples: [(int, int)...])
            cell_type (list of cell types: [str, str, ...]): non-goal cell types
            cell_rewards (reward mapping for each cell type: [int, int, ...]): reward value for cells in @cell_type
        """

        assert height > 0 and isinstance(
            height, int) and width > 0 and isinstance(
                width, int), "height and widht must be integers and > 0"
        self.cell_types = cell_types

        # Probability of each cell type
        vacancy_prob = 0.8
        # Can't say more about these numbers (chose arbitrarily larger than percolation threshold for square lattice).
        # This is just an approximation as the paper isn't concerned about cell probabilities or mention it.
        self.cell_prob = [4. * vacancy_prob / 5., vacancy_prob / 5.
                          ] + [(1 - vacancy_prob) / 3.] * 3
        # Matrix for identifying cell type and associated reward
        self.cells = np.random.choice(len(self.cell_types),
                                      p=self.cell_prob,
                                      size=height * width).reshape(
                                          height, width)
        self.cell_type_rewards = cell_type_rewards
        self.cell_rewards = np.asarray(
            [[cell_type_rewards[item] for item in row]
             for row in self.cells]).reshape(height, width)
        self.goal_reward = goal_reward

        GridWorldMDP.__init__(
            self,
            width=width,
            height=height,
            init_loc=init_loc,
            rand_init=rand_init,
            goal_locs=goal_locs,
            lava_locs=[()],
            walls=[],  # no walls in this mdp
            is_goal_terminal=is_goal_terminal,
            gamma=gamma,
            init_state=init_state,
            slip_prob=slip_prob,
            step_cost=step_cost,
            name=name)

        # Set goals and their rewards in the matrix
        for g in goal_locs:
            g_r, g_c = self._xy_to_rowcol(g[0], g[1])
            self.cells[g_r, g_c] = len(
                self.cell_types)  # allocate the next type to the goal
            self.cell_rewards[g_r, g_c] = self.goal_reward
        self.goal_locs = goal_locs

        self.feature_cell_dist = None

示例#8

0

显示文件

文件： NavigationMDP.py 项目： roma-patel/simple_rl

    def __init__(
            self,
            width=30,
            height=30,
            living_cell_types=["empty", "yellow", "red", "green", "purple"],
            living_cell_rewards=[0, 0, -10, -10, -10],
            living_cell_distribution="probability",
            living_cell_type_probs=[0.68, 0.17, 0.05, 0.05, 0.05],
            living_cell_locs=[
                np.inf, np.inf, [(1, 1), (5, 5)], [(2, 2)], [4, 4]
            ],
            goal_cell_locs=[],
            goal_cell_rewards=[],
            goal_cell_types=[],
            gamma=0.99,
            slip_prob=0.00,
            step_cost=0.0,
            is_goal_terminal=True,
            traj_init_cell_types=[0],
            planning_init_loc=(1, 1),
            planning_rand_init=True,
            name="Navigation MDP"):
        """
        Note: Locations are specified in (x,y) format, but (row, col) convention 
            is used while storing in memory. 
        Args:
            height (int): Height of navigation grid in no. of cells.
            width (int): Width of navigation grid in no. of cells.
            living_cell_types (list of cell types: [str, str, ...]): Non-goal cell types.
            living_cell_rewards (list of int): Reward for each @cell_type.
            living_cell_distribution (str):
                "probability" - will assign cells according to @living_cell_type_probs.
                "manual" - uses @living_cell_locs to assign cells to state space.
            living_cell_type_probs (list of floats): Probability corresponding to 
                each @living_cell_types. 
                Note: Goal isn't factored so actual probabilities can off.
                Default values are chosen arbitrarily larger than percolation threshold 
                for square lattice, which is just an approximation to match cell 
                distribution with that of the paper.
            living_cell_locs (list of list of tuples
            [[(x1,y1), (x2,y2)], [(x3,y3), ...], np.inf, ...}):
                Specifies living cell locations. If elements are set to np.inf, 
                they will be sampled uniformly at random.
            goal_cell_locs (list of tuples: [(int, int)...]): Goal locations.
            goal_cell_rewards (list of int): Goal rewards.
            goal_cell_types (list of str/int): Type of goal corresponding to @goal_cell_locs.
            traj_init_cell_types (list of int): Trajectory init state sampling cell type 
            """
        assert height > 0 and isinstance(height, int) and width > 0 \
               and isinstance(width,
                              int), "height and widht must be integers and > 0"
        assert len(living_cell_types) == len(living_cell_rewards)
        assert living_cell_distribution == "manual" or len(
            living_cell_types) == len(living_cell_type_probs)
        assert living_cell_distribution == "probability" or len(
            living_cell_types) == len(living_cell_locs)
        assert len(goal_cell_types) == len(goal_cell_locs) == len(
            goal_cell_rewards)

        GridWorldMDP.__init__(self,
                              width=width,
                              height=height,
                              init_loc=planning_init_loc,
                              rand_init=planning_rand_init,
                              goal_locs=goal_cell_locs,
                              lava_locs=[()],
                              walls=[],
                              is_goal_terminal=is_goal_terminal,
                              gamma=gamma,
                              init_state=None,
                              slip_prob=slip_prob,
                              step_cost=step_cost,
                              name=name)

        # Living (navigation) cell types (str) and ids
        self.living_cell_types = living_cell_types
        self.living_cell_ids = list(range(len(living_cell_types)))
        # State space (2d grid where each element holds a cell id)
        self.state_space = self.__generate_state_space(
            height, width, living_cell_distribution, living_cell_type_probs,
            living_cell_locs)
        # Preserve a copy without goals
        self.state_space_wo_goals = self.state_space.copy()

        # Rewards
        self.living_cell_rewards = living_cell_rewards
        self.state_rewards = np.asarray(
            [[self.living_cell_rewards[item] for item in row]
             for row in self.state_space]).reshape(height, width)
        # Preserve a copy without goals
        self.state_rewards_wo_goals = self.state_rewards.copy()

        # Update cells and cell_rewards with goal and its rewards
        self.reset_goals(goal_cell_locs, goal_cell_rewards, goal_cell_types)

        # Find set of Empty/Navigable cells for sampling trajectory init state
        self.set_traj_init_cell_types(cell_types=traj_init_cell_types)

        # Run value iteration
        self.value_iter = ValueIteration(self, sample_rate=1)

        # Additional book-keeping
        self.feature_cell_dist = None
        self.feature_cell_dist_kind = 0