def __init__(self, width=5, height=3, init_loc=(1, 1), rand_init=False, goal_locs=[(5, 3)], lava_locs=[()], walls=[], is_goal_terminal=True, gamma=0.99, slip_prob=0.0, step_cost=0.0, lava_cost=0.01, goal_rewards=[1.], name="Grid-world"): GridWorldMDP.__init__(self, width=width, height=height, init_loc=init_loc, rand_init=rand_init, goal_locs=goal_locs, lava_locs=lava_locs, walls=walls, is_goal_terminal=is_goal_terminal, gamma=gamma, slip_prob=slip_prob, step_cost=step_cost, lava_cost=lava_cost, name=name) self.goal_rewards = goal_rewards self.slip_unidirectional = True
def __init__(self, col_sq_locs_dict, width=5, height=3, init_loc=(1, 1), goal_locs=[(5, 3)]): ''' Args: col_sq_locs_dict (dict): Key: int (width) Val: dict Key: int (height) Val: color width (int) height (int) init_loc (tuple) goal_locs (list of tuples) ''' GridWorldMDP.__init__(self, width, height, init_loc=init_loc, goal_locs=goal_locs) self.init_state = ColoredGridWorldState( init_loc[0], init_loc[1], col_sq_locs_dict[init_loc[0]][init_loc[1]]) self.col_sq_locs_dict = col_sq_locs_dict
def __init__(self, width=5, height=3, init_loc=(1, 1), rand_init=False, goal_locs=[(5, 3)], lava_locs=[()], walls=[], safe_locs=[], is_goal_terminal=True, gamma=0.99, slip_prob=0.0, step_cost=0.0, lava_cost=1, name="gridworld", gui=False): GridWorldMDP.__init__(self, width, height, init_loc, rand_init, goal_locs, lava_locs, walls, is_goal_terminal, gamma, slip_prob, step_cost, lava_cost, name) self.jump_dist = 2 self.actions = UnsafeGridWorldMDP.ACTIONS self.safe_states = set() for state in self.get_all_states(): if (state.x, state.y) in safe_locs: self.safe_states.add(state) self.gui = gui if gui: self.screen = pygame.display.set_mode( (SCREEN_HEIGHT, SCREEN_HEIGHT)) self.agent_shape = None # Pygame setup. pygame.init() self.screen.fill((255, 255, 255)) pygame.display.update() self.agent_shape = self._draw_state(self.init_state, draw_statics=True)
def __init__(self, col_sq_locs_dict, width=5, height=3, init_loc=(1, 1), goal_locs=[(5, 3)]): ''' Args: col_sq_locs_dict (dict): Key: int (width) Val: dict Key: int (height) Val: color width (int) height (int) init_loc (tuple) goal_locs (list of tuples) ''' GridWorldMDP.__init__(self, width, height, init_loc=init_loc, goal_locs=goal_locs) self.init_state = ColoredGridWorldState(init_loc[0], init_loc[1], col_sq_locs_dict[init_loc[0]][init_loc[1]]) self.col_sq_locs_dict = col_sq_locs_dict
def __init__(self, width=30, height=30, goal_locs=[(21, 21)], cell_types=["empty", "yellow", "red", "green", "purple"], cell_type_rewards=[0, 0, -10, -10, -10], cell_distribution="probability", # cell_type_probs: default is chosen arbitrarily larger than # percolation threshold for square lattice, which is just an # approximation to match cell distribution with that of the # paper. cell_type_probs=[0.68, 0.17, 0.05, 0.05, 0.05], cell_type_forced_locations=[np.inf, np.inf, [(1,1),(5,5)], [(2,2)], [4,4]], gamma=0.99, slip_prob=0.00, step_cost=0.0, goal_rewards=[1.0], is_goal_terminal=True, traj_init_cell_types=[0], goal_colors=["blue"], init_loc=(1,1), rand_init=True, init_state=None, name="Navigation MDP"): """ Note: 1. locations and state dimensions start from 1 instead of 0. 2. 2d locations are interpreted in (x,y) format. Args: height (int): Height of navigation grid in no. of cells. width (int): Width of navigation grid in no. of cells. goal_locs (list of tuples: [(int, int)...]): Goal locations. cell_type (list of cell types: [str, str, ...]): Non-goal cell types. cell_rewards (list of ints): Reward for each @cell_type. cell_distribution (str): "probability" - will assign cells according to @cell_type_probs over the state space. "manual" - will use @cell_type_forced_locations to assign cells to locations. cell_type_probs (list of floats): Only applicable when @cell_distribution is set to "probability". Specifies probability corresponding to each @cell_type. Values must sum to 1. Each value signifies the probability of occurence of particular cell type in the grid. Note: the actual probabilities would be slightly off because this doesn't factor in number of goals. cell_type_forced_locations (list of list of tuples [[(x1,y1), (x2,y2)], [(x3,y3), ...], np.inf, ...}): Only applicable when @cell_distribution is set to "Manual". Used to specify additional cells and their locations. If elements are set to np.inf, all of them will be sampled uniformly at random. goal_colors (list of str/int): Color of goal corresponding to @goal_locs. If colors are different, each goal will be represented with a unique feature, otherwise all goals are mapped to same feature. traj_init_cell_types (list of ints): To specify which cell types are navigable. This is used in sampling empty/drivable states while generating trajectories. Not used but are properties of superclass GridWorldMDP: init_loc (tuple: (int, int)): (x,y) initial location rand_init (bool): Whether to use random initial location init_state (GridWorldState): Initial GridWorldState """ assert height > 0 and isinstance(height, int) and width > 0 \ and isinstance(width, int), "height and widht must be integers and > 0" assert len(goal_colors) == len(goal_locs) == len(goal_rewards) assert len(cell_types) == len(cell_type_rewards) assert cell_distribution == "manual" or len(cell_types) == len(cell_type_probs) assert cell_distribution == "probability" or len(cell_types) == len(cell_type_forced_locations) self.value_iter = None self._policy_invalidated = True self.cell_types = cell_types GridWorldMDP.__init__(self, width=width, height=height, init_loc=init_loc, rand_init=rand_init, goal_locs=goal_locs, lava_locs=[()], walls=[], # no walls in this mdp is_goal_terminal=is_goal_terminal, gamma=gamma, init_state=init_state, slip_prob=slip_prob, step_cost=step_cost, name=name) # Cell Types self.cells = self.__generate_cell_type_grid( height, width, cell_distribution, cell_type_probs, cell_type_forced_locations) # Preserve a copy without goals self.cells_wo_goals = self.cells.copy() # Cell Rewards self.cell_type_rewards = cell_type_rewards self.cell_rewards = np.asarray( [[self.cell_type_rewards[item] for item in row] for row in self.cells] ).reshape(height,width) # Preserve a copy without goals self.cell_rewards_wo_goals = self.cell_rewards.copy() # Update cells and cell_rewards with goal and its rewards self.reset_goals(goal_locs, goal_rewards, goal_colors) # Find set of Empty/Navigable cells for sampling trajectory init state self.set_traj_init_cell_types(cell_types=traj_init_cell_types) # Additional book-keeping self.feature_cell_dist = None self.feature_cell_dist_normalized = None
def __init__( self, width=30, height=30, init_loc=(1, 1), rand_init=True, goal_locs=[(21, 21)], cell_types=["empty", "yellow", "red", "green", "purple"], cell_type_rewards=[0, 0, -10, -10, -10], additional_obstacles={}, # this is for additional experimentation only gamma=0.99, slip_prob=0.00, step_cost=0.0, goal_reward=1.0, is_goal_terminal=True, init_state=None, vacancy_prob=0.8, sample_cell_types=[0], use_goal_dist_feature=True, goal_color="blue", name="Navigation MDP"): """ Note: 1. locations and state dimensions start from 1 instead of 0. 2. 2d locations are interpreted in (x,y) format. Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) cell_type (list of cell types: [str, str, ...]): non-goal cell types cell_rewards (reward mapping for each cell type: [int, int, ...]): reward value for cells in @cell_type """ assert height > 0 and isinstance( height, int) and width > 0 and isinstance( width, int), "height and widht must be integers and > 0" self.cell_types = cell_types GridWorldMDP.__init__( self, width=width, height=height, init_loc=init_loc, rand_init=rand_init, goal_locs=goal_locs, lava_locs=[()], walls=[], # no walls in this mdp is_goal_terminal=is_goal_terminal, gamma=gamma, init_state=init_state, slip_prob=slip_prob, step_cost=step_cost, name=name) # Probability of each cell type if len(additional_obstacles) > 0: self.cell_prob = np.zeros(len(self.cell_types)) self.cell_prob[0] = 1. else: # Can't say more about these numbers (chose arbitrarily larger than percolation threshold for # square lattice). This is just an approximation (to match cell distribution with that of the paper); # however, it is not the primary concern here. self.cell_prob = [8. * vacancy_prob / 10., 2 * vacancy_prob / 10. ] + [(1 - vacancy_prob) / 3.] * 3 # Matrix for identifying cell type and associated reward self.cells = np.random.choice(len(self.cell_types), p=self.cell_prob, size=height * width).reshape( height, width) self.additional_obstacles = additional_obstacles for obs_type, obs_locs in self.additional_obstacles.items(): for obs_loc in obs_locs: row, col = self._xy_to_rowcol(obs_loc[0], obs_loc[1]) self.cells[row, col] = obs_type self.cell_type_rewards = cell_type_rewards self.cell_rewards = np.asarray( [[cell_type_rewards[item] for item in row] for row in self.cells]).reshape(height, width) self.goal_reward = goal_reward # Set goals and their rewards in the matrix for g in goal_locs: g_r, g_c = self._xy_to_rowcol(g[0], g[1]) self.cells[g_r, g_c] = len( self.cell_types) # allocate the next type to the goal self.cell_rewards[g_r, g_c] = self.goal_reward self.goal_locs = goal_locs self.use_goal_dist_feature = use_goal_dist_feature self.goal_color = goal_color self.feature_cell_dist = None self.feature_cell_dist_normalized = None self.value_iter = None self.define_sample_cells(cell_types=sample_cell_types)
def __init__(self, width=30, height=30, init_loc=(1, 1), rand_init=True, goal_locs=[(21, 21)], cell_types=["empty", "yellow", "red", "green", "purple"], cell_type_rewards=[0, 0, -10, -10, -10], gamma=0.99, slip_prob=0.00, step_cost=0.0, goal_reward=1.0, is_goal_terminal=True, init_state=None, name="Navigation MDP"): """ Note: 1. locations and state dimensions start from 1 instead of 0. 2. 2d locations are interpreted in (x,y) format. Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) cell_type (list of cell types: [str, str, ...]): non-goal cell types cell_rewards (reward mapping for each cell type: [int, int, ...]): reward value for cells in @cell_type """ assert height > 0 and isinstance( height, int) and width > 0 and isinstance( width, int), "height and widht must be integers and > 0" self.cell_types = cell_types # Probability of each cell type vacancy_prob = 0.8 # Can't say more about these numbers (chose arbitrarily larger than percolation threshold for square lattice). # This is just an approximation as the paper isn't concerned about cell probabilities or mention it. self.cell_prob = [4. * vacancy_prob / 5., vacancy_prob / 5. ] + [(1 - vacancy_prob) / 3.] * 3 # Matrix for identifying cell type and associated reward self.cells = np.random.choice(len(self.cell_types), p=self.cell_prob, size=height * width).reshape( height, width) self.cell_type_rewards = cell_type_rewards self.cell_rewards = np.asarray( [[cell_type_rewards[item] for item in row] for row in self.cells]).reshape(height, width) self.goal_reward = goal_reward GridWorldMDP.__init__( self, width=width, height=height, init_loc=init_loc, rand_init=rand_init, goal_locs=goal_locs, lava_locs=[()], walls=[], # no walls in this mdp is_goal_terminal=is_goal_terminal, gamma=gamma, init_state=init_state, slip_prob=slip_prob, step_cost=step_cost, name=name) # Set goals and their rewards in the matrix for g in goal_locs: g_r, g_c = self._xy_to_rowcol(g[0], g[1]) self.cells[g_r, g_c] = len( self.cell_types) # allocate the next type to the goal self.cell_rewards[g_r, g_c] = self.goal_reward self.goal_locs = goal_locs self.feature_cell_dist = None
def __init__( self, width=30, height=30, living_cell_types=["empty", "yellow", "red", "green", "purple"], living_cell_rewards=[0, 0, -10, -10, -10], living_cell_distribution="probability", living_cell_type_probs=[0.68, 0.17, 0.05, 0.05, 0.05], living_cell_locs=[ np.inf, np.inf, [(1, 1), (5, 5)], [(2, 2)], [4, 4] ], goal_cell_locs=[], goal_cell_rewards=[], goal_cell_types=[], gamma=0.99, slip_prob=0.00, step_cost=0.0, is_goal_terminal=True, traj_init_cell_types=[0], planning_init_loc=(1, 1), planning_rand_init=True, name="Navigation MDP"): """ Note: Locations are specified in (x,y) format, but (row, col) convention is used while storing in memory. Args: height (int): Height of navigation grid in no. of cells. width (int): Width of navigation grid in no. of cells. living_cell_types (list of cell types: [str, str, ...]): Non-goal cell types. living_cell_rewards (list of int): Reward for each @cell_type. living_cell_distribution (str): "probability" - will assign cells according to @living_cell_type_probs. "manual" - uses @living_cell_locs to assign cells to state space. living_cell_type_probs (list of floats): Probability corresponding to each @living_cell_types. Note: Goal isn't factored so actual probabilities can off. Default values are chosen arbitrarily larger than percolation threshold for square lattice, which is just an approximation to match cell distribution with that of the paper. living_cell_locs (list of list of tuples [[(x1,y1), (x2,y2)], [(x3,y3), ...], np.inf, ...}): Specifies living cell locations. If elements are set to np.inf, they will be sampled uniformly at random. goal_cell_locs (list of tuples: [(int, int)...]): Goal locations. goal_cell_rewards (list of int): Goal rewards. goal_cell_types (list of str/int): Type of goal corresponding to @goal_cell_locs. traj_init_cell_types (list of int): Trajectory init state sampling cell type """ assert height > 0 and isinstance(height, int) and width > 0 \ and isinstance(width, int), "height and widht must be integers and > 0" assert len(living_cell_types) == len(living_cell_rewards) assert living_cell_distribution == "manual" or len( living_cell_types) == len(living_cell_type_probs) assert living_cell_distribution == "probability" or len( living_cell_types) == len(living_cell_locs) assert len(goal_cell_types) == len(goal_cell_locs) == len( goal_cell_rewards) GridWorldMDP.__init__(self, width=width, height=height, init_loc=planning_init_loc, rand_init=planning_rand_init, goal_locs=goal_cell_locs, lava_locs=[()], walls=[], is_goal_terminal=is_goal_terminal, gamma=gamma, init_state=None, slip_prob=slip_prob, step_cost=step_cost, name=name) # Living (navigation) cell types (str) and ids self.living_cell_types = living_cell_types self.living_cell_ids = list(range(len(living_cell_types))) # State space (2d grid where each element holds a cell id) self.state_space = self.__generate_state_space( height, width, living_cell_distribution, living_cell_type_probs, living_cell_locs) # Preserve a copy without goals self.state_space_wo_goals = self.state_space.copy() # Rewards self.living_cell_rewards = living_cell_rewards self.state_rewards = np.asarray( [[self.living_cell_rewards[item] for item in row] for row in self.state_space]).reshape(height, width) # Preserve a copy without goals self.state_rewards_wo_goals = self.state_rewards.copy() # Update cells and cell_rewards with goal and its rewards self.reset_goals(goal_cell_locs, goal_cell_rewards, goal_cell_types) # Find set of Empty/Navigable cells for sampling trajectory init state self.set_traj_init_cell_types(cell_types=traj_init_cell_types) # Run value iteration self.value_iter = ValueIteration(self, sample_rate=1) # Additional book-keeping self.feature_cell_dist = None self.feature_cell_dist_kind = 0