Exemplo n.º 1
0
    def __init__(self,
                 starting_room=1,
                 gamma=0.99,
                 slip_prob=0.0,
                 env_file=[],
                 constraints={},
                 ap_maps={}):
        # TODO: work
        initial_state = CubeL1State(starting_room, 0)
        #self.goal_state = CubeL1State(goal_room,0, is_terminal=True)
        #self.terminal_func = lambda state: state._is_terminal# == 1
        self.constraints = constraints
        self.ap_maps = ap_maps
        self.slip_prob = slip_prob

        if len(env_file) != 0:
            self.cube_env = env_file[0]
            CubeL1MDP.ACTIONS = self.cube_env['L1ACTIONS']
        else:
            print("Input: env_file")

        MDP.__init__(self,
                     CubeL1MDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=initial_state,
                     gamma=gamma)
Exemplo n.º 2
0
    def __init__(self,
                 graph,
                 goals={},
                 start_state={},
                 is_goal_terminal=True,
                 gamma=0.99,
                 init_state=None,
                 slip_prob=0.0,
                 step_cost=0.0,
                 name="gridworld"):
        '''
        Args:
            height (int)
            width (int)
            init_loc (tuple: (int, int))
            goal_locs (list of tuples: [(int, int)...])
            lava_locs (list of tuples: [(int, int)...]): These locations return -1 reward.
        '''

        # Setup init location.
        self.graph = graph
        self.goals = goals

        MDP.__init__(self,
                     self.graph.keys(),
                     self._transition_func,
                     self._reward_func,
                     init_state=GraphWorldState(state=0, goals=goals),
                     gamma=gamma)

        self.step_cost = step_cost
        self.cur_state = GraphWorldState(start_state, goals)
        self.is_goal_terminal = is_goal_terminal
        self.name = name
 def __init__(self,
              width=8,
              height=7,
              init_loc=(1, 1),
              rocks=None,
              gamma=0.99,
              slip_prob=0.00,
              rock_rewards=[0.1, 1, 20],
              name="rocksample"):
     '''
     Args:
         height (int)
         width (int)
         init_loc (tuple: (int, int))
         goal_locs (list of tuples: [(int, int)...])
     '''
     if rocks is None:
         rocks = [[1, 2, True], [5, 4, True], [6, 7, True], [1, 3, True],
                  [4, 5, True], [2, 7, False], [2, 2, True], [7, 4, False]]
     self.init_loc = init_loc
     self.init_rocks = rocks
     self.rock_rewards = rock_rewards
     self.name = name + "-" + str(len(rocks))
     self.width = width
     self.height = height
     MDP.__init__(self,
                  RockSampleMDP.ACTIONS,
                  self._transition_func,
                  self._reward_func,
                  init_state=self.get_init_state(),
                  gamma=gamma)
Exemplo n.º 4
0
 def __init__(self,
              width=8,
              height=7,
              init_loc=(1, 1),
              rocks=[[1, 2, True], [3, 1, False], [4, 2,
                                                   True], [3, 5, False],
                     [4, 5, True], [2, 7, False], [6, 6, True],
                     [7, 4, False]],
              gamma=0.99,
              slip_prob=0.00,
              rock_reward=10,
              name="rocksample"):
     '''
     Args:
         height (int)
         width (int)
         init_loc (tuple: (int, int))
         goal_locs (list of tuples: [(int, int)...])
     '''
     self.init_loc = init_loc
     self.rocks = rocks
     self.rock_reward = rock_reward
     self._update_actions()
     self.name = name + "-" + str(self.rock_reward)
     self.width = width
     self.height = height
     MDP.__init__(self,
                  RockSampleMDP.ACTIONS,
                  self._transition_func,
                  self._reward_func,
                  init_state=self.get_init_state(),
                  gamma=gamma)
    def __init__(self,
                 init_mean=(-0.2, -0.2),
                 control_cost=False,
                 dense_reward=False,
                 render=False):
        xml = os.path.join(
            os.path.expanduser("~"),
            "git-repos/dm_control/dm_control/suite/point_mass.xml")
        model = load_model_from_path(xml)
        self.sim = MjSim(model)
        self.render = render
        self.init_mean = init_mean
        self.control_cost = control_cost
        self.dense_reward = dense_reward

        if self.render: self.viewer = MjViewer(self.sim)

        # Config
        self.env_name = "Point-Mass-Environment"
        self.target_position = np.array([0., 0.])
        self.target_tolerance = 0.02
        self.init_noise = 0.05

        self._initialize_mujoco_state()
        self.init_state = self.get_state()

        print("Loaded {} with dense_reward={}".format(self.env_name,
                                                      self.dense_reward))

        MDP.__init__(self, [0, 1], self._transition_func, self._reward_func,
                     self.init_state)
Exemplo n.º 6
0
    def __init__(self,
                 starting_room=1,
                 gamma=0.99,
                 slip_prob=0.0,
                 env_file=[],
                 constraints={},
                 ap_maps={},
                 automata=[],
                 init_state=[]):
        # TODO: work
        #self.terminal_func = lambda state: state._is_terminal
        self.constraints = constraints
        self.ap_maps = ap_maps
        self.slip_prob = slip_prob
        self.automata = automata

        if len(env_file) != 0:
            self.cube_env = env_file[0]
            CubeL1MDP.ACTIONS = self.cube_env['L1ACTIONS']
        else:
            print("Input: env_file")

#        initial_state = CubeL1State(starting_room, self._transition_q(starting_room, ""))
#        if initial_state.q != 0:
#            initial_state.set_terminal(True)

        MDP.__init__(self,
                     CubeL1MDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=init_state,
                     gamma=gamma)
Exemplo n.º 7
0
    def __init__(self,
                gravity=9.8,
                masscart=1.0,
                masspole=0.1,
                length=.5,
                gamma=0.99,
                tau=.02,
                init_state_params=None,
                name="Cart-Pendulum"):

        if init_state_params is None:
            init_state = CartPoleState(x=0, x_dot=0, theta=0, theta_dot=0)
        else:
            init_state = CartPoleState(x=init_state_params["x"], x_dot=init_state_params["x_dot"],\
                                        theta=init_state_params["theta"], theta_dot=init_state_params["theta_dot"])

        MDP.__init__(self, CartPoleMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)

        #from parameters
        self.gravity = gravity
        self.masscart = masscart
        self.masspole = masspole
        self.length = length
        self.gamma = gamma
        self.tau = tau
        self.name = name

        #thresholds
        self.x_threshold = 2.4 #abs val of limit of x position of cart
        self.theta_threshold = self._degrees_to_radians(20) #angle away from vertical before being considered terminal

        #computed
        self.total_mass = (self.masscart + self.masspole)
        self.polemass_length = (self.masspole * self.length)
    def __init__(self,
                 starting_floor=1,
                 gamma=0.99,
                 env_file=[],
                 constraints={},
                 ap_maps={}):
        self.terminal_func = lambda state: state.q != 0
        self.constraints = constraints
        self.ap_maps = ap_maps

        if len(env_file) != 0:
            self.cube_env = env_file[0]
            CubeL2MDP.ACTIONS = self.cube_env['L2ACTIONS']
        else:
            print("Input: env_file")

        initial_state = CubeL2State(starting_floor,
                                    self._transition_q(starting_floor, ""))
        if initial_state.q != 0:
            initial_state.set_terminal(True)

        MDP.__init__(self,
                     CubeL2MDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=initial_state,
                     gamma=gamma)
Exemplo n.º 9
0
    def __init__(self, task, init_loc=(0, 0), blocks=[], rooms=[], doors=[], rand_init=False, gamma=0.99,
                 init_state=None):
        '''
        :param task: The given CleanUpTask for this MDP
        :param init_loc: Initial agent location
        :param blocks: List of blocks
        :param rooms: List of rooms
        :param doors: List of doors
        :param rand_init: random initialization boolean
        :param gamma: gamma factor
        :param init_state: Initial state if given
        '''
        from simple_rl.tasks.cleanup.cleanup_state import CleanUpState
        self.task = task
        if rand_init:
            block_loc = [(x, y) for block in blocks for (x, y) in (block.x, block.y)]
            init_loc = random.choice(
                [(x, y) for room in rooms for (x, y) in room.points_in_room if (x, y) not in block_loc])
        init_state = CleanUpState(task, init_loc[0], init_loc[1], blocks=blocks, doors=doors, rooms=rooms) \
            if init_state is None or rand_init else init_state
        self.cur_state = init_state
        MDP.__init__(self, self.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)

        # The following lines are used for efficiency
        legal_states = [(x, y) for room in rooms for x, y in room.points_in_room]
        legal_states.extend([(door.x, door.y) for door in doors])
        self.legal_states = set(legal_states)
        self.door_locs = set([(door.x, door.y) for door in doors])
        self.width = max(self.legal_states, key=lambda tup: tup[0])[0] + 1
        self.height = max(self.legal_states, key=lambda tup: tup[1])[1] + 1
    def __init__(self,
                 num_states=5,
                 num_rand_trans=5,
                 num_actions=3,
                 gamma=0.99):
        '''
        Args:
            num_states (int) [optional]: Number of states in the Random MDP.
            num_rand_trans (int) [optional]: Number of possible next states.

        Summary:
            Each state-action pair picks @num_rand_trans possible states and has a uniform distribution
            over them for transitions. Rewards are also chosen randomly.
        '''
        RandomMDP.ACTIONS = [str(i) for i in range(num_actions)]
        MDP.__init__(self,
                     RandomMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=RandomState(1),
                     gamma=gamma)
        # assert(num_rand_trans <= num_states)
        self.num_rand_trans = num_rand_trans
        self.num_states = num_states
        self._reward_s_a = (random.choice(range(self.num_states)),
                            random.choice(RandomMDP.ACTIONS))
        self._transitions = defaultdict(lambda: defaultdict(str))
Exemplo n.º 11
0
    def __init__(self,
                 combo,
                 num_actions=3,
                 num_states=None,
                 reset_val=0.01,
                 gamma=0.99):
        '''
        Args:
            num_states (int) [optional]: Number of states in the chain.
        '''
        ComboLockMDP.ACTIONS = [str(i) for i in range(1, num_actions + 1)]
        self.num_states = len(combo) if num_states is None else num_states
        self.num_actions = num_actions
        self.combo = combo

        if len(combo) != self.num_states:
            raise ValueError("(simple_rl.ComboLockMDP Error): Combo length (" +
                             str(len(combo)) +
                             ") must be the same as num_states (" +
                             str(self.num_states) + ").")
        elif max(combo) > num_actions:
            raise ValueError(
                "(simple_rl.ComboLockMDP Error): Combo (" + str(combo) +
                ") must only contain values less than or equal to @num_actions ("
                + str(num_actions) + ").")

        MDP.__init__(self,
                     ComboLockMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=ChainState(1),
                     gamma=gamma)
Exemplo n.º 12
0
    def __init__(self, starting_room=1, goal_room=4, gamma=0.99):
        initial_state = FourRoomL1State(starting_room)
        self.goal_state = FourRoomL1State(goal_room, is_terminal=True)
        self.terminal_func = lambda state: state == self.goal_state

        MDP.__init__(self, FourRoomL1MDP.ACTIONS, self._transition_func, self._reward_func, init_state=initial_state,
                     gamma=gamma)
Exemplo n.º 13
0
    def __init__(self,
                 torque_multiplier=50.,
                 init_mean=(-0.2, -0.2),
                 render=False):
        dir_path = os.path.dirname(os.path.realpath(__file__))
        path = os.path.join(dir_path, 'asset/point_mass.xml')
        model = load_model_from_path(path)
        self.sim = MjSim(model)
        self.render = render
        self.init_mean = init_mean

        self.viewer = MjViewer(self.sim)

        # Config
        self.env_name = "Point-Mass-Environment"
        self.target_position = np.array([0., 0.])
        self.target_tolerance = 0.01
        self.init_noise = 0.05
        self.max_absolute_torque = 5.
        self.torque_multiplier = torque_multiplier

        self._initialize_mujoco_state()
        self.init_state = self.get_state()

        MDP.__init__(self, [0, 1], self._transition_func, self._reward_func,
                     self.init_state)
Exemplo n.º 14
0
    def __init__(self, width, height,
                tee_loc, hole_loc, rough_locs, hazard_locs, wind_dir, wind_spd,
                gamma, step_cost, name):
        
        '''
        In rough, all driver shots converted to iron and failure chance exists
        In hazard, only putt is allowed and greater failure chance exists
        wind_dir is either up, down, left, right
        wind_spd shifts driver shots by .2*speed and iron shots by .1*speed on average
        '''
        self.width = width
        self.height = height
        self.tee_loc = tee_loc
        self.hole_loc = hole_loc
        self.rough_locs = rough_locs
        self.hazard_locs = hazard_locs
        self.wind_dir = wind_dir
        self.wind_spd = wind_spd
        self.gamma = gamma
        self.step_cost = step_cost
        self.name = name

        self.new_x = 0
        self.new_y = 0

        self.init_state = GolfWorldState(self.tee_loc[0], self.tee_loc[1])
        
        MDP.__init__(self, GolfWorldMDP.ACTIONS, self._transition_func, self._reward_func, self.init_state, self.gamma, self.step_cost)
Exemplo n.º 15
0
    def __init__(self,
                 noise=0.,
                 episode_length=1000,
                 reward_scale=1000.,
                 cfg="pinball_empty.cfg",
                 render=False):
        # default_config_dir = os.path.join(__rlpy_location__, "Domains", "PinballConfigs")
        default_config_dir = os.path.dirname(__file__)
        self.cfg = cfg
        self.domain = Pinball(noise=noise,
                              episodeCap=episode_length,
                              configuration=os.path.join(
                                  default_config_dir, "PinballConfigs",
                                  self.cfg))
        self.render = render
        self.reward_scale = reward_scale

        # Each observation from domain.step(action) is a tuple of the form reward, next_state, is_term, possible_actions
        # s0 returns initial state, is_terminal, possible_actions
        init_observation = self.domain.s0()
        init_state = tuple(init_observation[0])

        actions = self.domain.actions

        MDP.__init__(self,
                     actions,
                     self._transition_func,
                     self._reward_func,
                     init_state=PinballState(*init_state))
Exemplo n.º 16
0
    def __init__(self, seed, dense_reward=False, render=False, color_str=""):
        self.env_name = "point_maze"
        self.seed = seed
        self.dense_reward = dense_reward
        self.render = render

        # Set random seed
        random.seed(seed)
        np.random.seed(seed)

        # Configure env
        gym_mujoco_kwargs = {
            'maze_id': 'Maze',
            'n_bins': 0,
            'observe_blocks': False,
            'put_spin_near_agent': False,
            'top_down_view': False,
            'manual_collision': True,
            'maze_size_scaling': 4,
            'color_str': color_str
        }
        self.env = PointMazeEnv(**gym_mujoco_kwargs)
        self.goal_position = self.env.goal_xy
        self.reset()

        MDP.__init__(self, [1, 2], self._transition_func, self._reward_func,
                     self.init_state)
Exemplo n.º 17
0
    def __init__(self,
                 gravity=9.8,
                 masscart=1.0,
                 masspole=0.1,
                 length=.5,
                 gamma=0.99,
                 tau=.02,
                 name="Cart-Pendulum"):

        init_state = CartPoleState(x=0, x_dot=0, theta=0, theta_dot=0)

        MDP.__init__(self,
                     CartPoleMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=init_state,
                     gamma=gamma)

        #from parameters
        self.gravity = gravity
        self.masscart = masscart
        self.masspole = masspole
        self.length = length
        self.gamma = gamma
        self.tau = tau
        self.name = name

        #thresholds
        self.x_threshold = 2.4  #abs val of limit of x position of cart
        self.theta_threshold = self._degrees_to_radians(
            20)  #angle away from vertical before being considered terminal

        #computed
        self.total_mass = (self.masscart + self.masspole)
        self.polemass_length = (self.masspole * self.length)
Exemplo n.º 18
0
    def __init__(self, num_pegs=3, num_discs=3, gamma=0.95):
        '''
        Args:
            num_pegs (int)
            num_discs (int)
            gamma (float)
        '''
        self.num_pegs = num_pegs
        self.num_discs = num_discs
        HanoiMDP.ACTIONS = [
            str(x) + str(y) for x, y in itertools.product(
                range(self.num_pegs), range(self.num_pegs)) if x != y
        ]

        # Setup init state.
        init_state = [" " for peg in range(num_pegs)]
        x = ""
        for i in range(num_discs):
            x += chr(97 + i)
        init_state[0] = x
        init_state = State(data=init_state)

        MDP.__init__(self,
                     HanoiMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=init_state,
                     gamma=gamma)
Exemplo n.º 19
0
 def __init__(self, ns, r):
     self.nS = ns
     self.nA = 2
     self.states = range(self.nS)
     self.actions = range(self.nA)
     self.T = self.generate_transition_matrix()
     self.R = r
     MDP.__init__(self, self.actions, self._transition_func, init_state=0)
 def __init__(self, num_states=5, reset_val=0.01, gamma=0.99):
     '''
     Args:
         num_states (int) [optional]: Number of states in the chain.
     '''
     MDP.__init__(self, ChainMDP.ACTIONS, self._transition_func, self._reward_func, init_state=ChainState(1), gamma=gamma)
     self.num_states = num_states
     self.reset_val = reset_val
 def __init__(self, env_name='CartPole-v0', render=False):
     '''
     Args:
         env_name (str)
     '''
     self.env_name = env_name
     self.env = gym.make(env_name)
     self.render = render
     MDP.__init__(self, range(self.env.action_space.n), self._transition_func, self._reward_func, init_state=GymState(self.env.reset()))
Exemplo n.º 22
0
    def __init__(self, starting_room=1, goal_room=4, starting_items=[], goal_items=[] ,gamma=0.99,actions=None,doors=[],rooms=[]):
        initial_state = FourRoomL1State(starting_room,items=starting_items)
        self.goal_state = FourRoomL1State(goal_room, is_terminal=True, items=goal_items)
        self.terminal_func = lambda state: state == self.goal_state
        self.doors=doors
        self.rooms=rooms
        

        MDP.__init__(self,actions, self._transition_func, self._reward_func, init_state=initial_state,
                     gamma=gamma)
Exemplo n.º 23
0
 def __init__(self,
              actions,
              transition_func,
              reward_func,
              init_state,
              gamma=0.99,
              step_cost=0,
              str='OMDP'):
     MDP.__init__(self, actions, transition_func, reward_func, init_state,
                  gamma, step_cost)
     self.str = str
Exemplo n.º 24
0
 def __init__(self,
              actions,
              transition_func,
              reward_func,
              init_state,
              gamma=0.99):
     MDP.__init__(self,
                  actions,
                  transition_func,
                  reward_func,
                  init_state=init_state,
                  gamma=gamma)
Exemplo n.º 25
0
    def __init__(self,
                width=5,
                height=3,
                init_loc=(1, 1),
                rand_init=False,
                goal_locs=[()],
                lava_locs=[()],
                walls=[],
                is_goal_terminal=True,
                is_lava_terminal=False,
                gamma=0.99,
                slip_prob=0.0,
                step_cost=0.0,
                lava_cost=1.0,
                name="gridworld"):
        '''
        Args:
            height (int)
            width (int)
            init_loc (tuple: (int, int))
            goal_locs (list of tuples: [(int, int)...])
            lava_locs (list of tuples: [(int, int)...]): These locations return -1 reward.
            walls (list)
            is_goal_terminal (bool)
        '''

        # Setup init location.
        self.rand_init = rand_init
        if rand_init:
            init_loc = random.randint(1, width), random.randint(1, height)
            while init_loc in walls:
                init_loc = random.randint(1, width), random.randint(1, height)
        self.init_loc = init_loc
        init_state = GridWorldState(init_loc[0], init_loc[1])

        MDP.__init__(self, GridWorldMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)

        if type(goal_locs) is not list:
            raise ValueError("(simple_rl) GridWorld Error: argument @goal_locs needs to be a list of locations. For example: [(3,3), (4,3)].")
        self.step_cost = step_cost
        self.lava_cost = lava_cost
        self.walls = walls
        self.width = width
        self.height = height
        self.goal_locs = goal_locs
        self.cur_state = GridWorldState(init_loc[0], init_loc[1])
        self.is_goal_terminal = is_goal_terminal
        self.is_lava_terminal = is_lava_terminal
        self.slip_prob = slip_prob
        self.name = name
        self.lava_locs = lava_locs
Exemplo n.º 26
0
 def __init__(self, num_arms=10, distr_family=np.random.normal, distr_params=None):
     '''
     Args:
         num_arms (int): Number of arms.
         distr_family (lambda): A function from numpy which, when given
             entities from @distr_params, samples from the distribution family.
         distr_params (dict): If None is given, default mu/sigma for normal
             distribution are initialized randomly.
     '''
     BanditMDP.ACTIONS = [str(i) for i in range(1, num_arms + 1)]
     MDP.__init__(self, BanditMDP.ACTIONS, self._transition_func, self._reward_func, init_state=State(1), gamma=1.0)
     self.num_arms = num_arms
     self.distr_family = distr_family
     self.distr_params = self.init_distr_params() if distr_params is None else distr_params    
Exemplo n.º 27
0
    def __init__(self, l0_domain):
        '''
        Args:
            l0_domain (CleanUpMDP)
        '''
        self.l0_domain = l0_domain

        state_mapper = AbstractCleanupL1StateMapper(l0_domain)
        l1_init_state = state_mapper.map_state(l0_domain.init_state)
        grounded_actions = CleanupL1MDP.ground_actions(l1_init_state)
        self.terminal_func = self._is_goal_state

        MDP.__init__(self, grounded_actions, self._transition_function,
                     self._reward_function, l1_init_state)
 def __init__(self,
              actions,
              transition_func,
              reward_func,
              init_state,
              gamma=0.99,
              num_agents=2):
     MDP.__init__(self,
                  actions,
                  transition_func,
                  reward_func,
                  init_state=init_state,
                  gamma=gamma)
     self.num_agents = num_agents
Exemplo n.º 29
0
    def __init__(self, env_name='Reacher-v2', render=False):
        '''
        Args:
            env_name (str)
        '''
        self.env_name = env_name
        self.env = NormalizedEnv(gym.make(env_name))
        self.render = render

        MDP.__init__(self,
                     range(self.env.action_space.shape[0]),
                     self._transition_func,
                     self._reward_func,
                     init_state=FixedReacherState(self.env.reset()))
Exemplo n.º 30
0
 def __init__(self, env_name='CartPole-v0', render=False, render_every_n_episodes=0):
     '''
     Args:
         env_name (str)
         render (bool): If True, renders the screen every time step.
         render_every_n_epsiodes (int): @render must be True, then renders the screen every n episodes.
     '''
     # self.render_every_n_steps = render_every_n_steps
     self.render_every_n_episodes = render_every_n_episodes
     self.episode = 0
     self.env_name = env_name
     self.env = gym.make(env_name)
     self.render = render
     MDP.__init__(self, range(self.env.action_space.n), self._transition_func, self._reward_func, init_state=GymState(self.env.reset()))
 def __init__(self, num_arms=10, distr_family=np.random.normal, distr_params=None):
     '''
     Args:
         num_arms (int): Number of arms.
         distr_family (lambda): A function from numpy which, when given
             entities from @distr_params, samples from the distribution family.
         distr_params (dict): If None is given, default mu/sigma for normal
             distribution are initialized randomly.
     '''
     BanditMDP.ACTIONS = [str(i) for i in range(1, num_arms + 1)]
     MDP.__init__(self, BanditMDP.ACTIONS, self._transition_func, self._reward_func, init_state=State(1), gamma=1.0)
     self.num_arms = num_arms
     self.distr_family = distr_family
     self.distr_params = self.init_distr_params() if distr_params is None else distr_params
Exemplo n.º 32
0
    def __init__(self,
                 width=5,
                 height=3,
                 init_loc=(1, 1),
                 goal_locs=[(5, 3)],
                 num_colors=3,
                 is_goal_terminal=True,
                 gamma=0.99,
                 init_state=None,
                 slip_prob=0.0,
                 name="color"):
        '''
        Args:
            height (int)
            width (int)
            init_loc (tuple: (int, int))
            goal_locs (list of tuples: [(int, int)...])
        '''

        ColorMDP.COLOR_MAP = range(num_colors)

        # Setup init location.
        self.init_loc = init_loc
        init_state = ColorState(
            init_loc[0], init_loc[1], ColorMDP.COLOR_MAP[0]
        ) if init_state is None or rand_init else init_state

        MDP.__init__(self,
                     ColorMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=init_state,
                     gamma=gamma)

        if type(goal_locs) is not list:
            print(
                "(simple_rl) color Error: argument @goal_locs needs to be a list of locations. For example: [(3,3), (4,3)]."
            )
            quit()

        self.width = width
        self.height = height
        self.walls = self._compute_walls()
        self.goal_locs = goal_locs
        self.cur_state = ColorState(init_loc[0], init_loc[1],
                                    ColorMDP.COLOR_MAP[0])
        self.is_goal_terminal = is_goal_terminal
        self.slip_prob = slip_prob
        self.name = name
Exemplo n.º 33
0
    def __init__(self, init_board=Board(False, 0), name="2048"):
        '''
        Initializer method that creates the MDP and assigns the other fields
        according to the 2048 game.
        '''
        self.step_cost = 0.0
        self.gamma = 0.99
        self.name = name
        self.current_state = TFEState((init_board.addTile()).addTile().board)

        # Initializing the actual Markov Decision Process:
        MDP.__init__(tfeMDP.ACTIONS, self._transition_func, self._reward_func,
                     init_state=TFEState(
                         (init_board.addTile()).addTile().board),
                     gamma=gamma)
Exemplo n.º 34
0
    def __init__(self, pomdp):
        '''
        Convert given POMDP to a Belief State MDP
        Args:
            pomdp (POMDP)
        '''
        self.state_transition_func = pomdp.transition_func
        self.state_reward_func = pomdp.reward_func
        self.state_observation_func = pomdp.observation_func
        self.belief_updater_func = pomdp.belief_updater_func

        self.pomdp = pomdp

        MDP.__init__(self, pomdp.actions, self._belief_transition_function, self._belief_reward_function,
                     BeliefState(pomdp.init_belief), pomdp.gamma, pomdp.step_cost)
Exemplo n.º 35
0
    def __init__(self, combo, num_actions=3, num_states=None, reset_val=0.01, gamma=0.99):
        '''
        Args:
            num_states (int) [optional]: Number of states in the chain.
        '''
        ComboLockMDP.ACTIONS = [str(i) for i in range(1, num_actions + 1)]
        self.num_states = len(combo) if num_states is None else num_states
        self.num_actions = num_actions
        self.combo = combo

        if len(combo) != self.num_states:
            raise ValueError("(simple_rl.ComboLockMDP Error): Combo length (" + str(len(combo)) + ") must be the same as num_states (" + str(self.num_states) + ").")
        elif max(combo) > num_actions:
            raise ValueError("(simple_rl.ComboLockMDP Error): Combo (" + str(combo) + ") must only contain values less than or equal to @num_actions (" + str(num_actions) +").")

        MDP.__init__(self, ComboLockMDP.ACTIONS, self._transition_func, self._reward_func, init_state=ChainState(1), gamma=gamma)
Exemplo n.º 36
0
 def __init__(self, width=8, height=7, init_loc=(1,1), rocks=None, gamma=0.99, slip_prob=0.00, rock_rewards=[0.1, 1, 20], name="rocksample"):
     '''
     Args:
         height (int)
         width (int)
         init_loc (tuple: (int, int))
         goal_locs (list of tuples: [(int, int)...])
     '''
     if rocks is None:
         rocks = [[1,2,True], [5,4,True], [6,7,True], [1,3,True], [4,5,True], [2,7,False], [2,2,True], [7,4,False]]
     self.init_loc = init_loc
     self.init_rocks = rocks
     self.rock_rewards = rock_rewards
     self.name = name + "-" + str(len(rocks))
     self.width = width
     self.height = height
     MDP.__init__(self, RockSampleMDP.ACTIONS, self._transition_func, self._reward_func, init_state=self.get_init_state(), gamma=gamma)
Exemplo n.º 37
0
    def __init__(self, num_pegs=3, num_discs=3, gamma=0.95):
        '''
        Args:
            num_pegs (int)
            num_discs (int)
            gamma (float)
        '''
        self.num_pegs = num_pegs
        self.num_discs = num_discs
        HanoiMDP.ACTIONS = [str(x) + str(y) for x, y in itertools.product(range(self.num_pegs), range(self.num_pegs)) if x != y]

        # Setup init state.
        init_state = [" " for peg in range(num_pegs)]
        x = ""
        for i in range(num_discs):
            x += chr(97 + i)
        init_state[0] = x
        init_state = State(data=init_state)

        MDP.__init__(self, HanoiMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)
Exemplo n.º 38
0
 def __init__(self, actions, transition_func, reward_func, init_state, gamma=0.99):
     MDP.__init__(self, actions, transition_func, reward_func, init_state=init_state, gamma=gamma)
Exemplo n.º 39
0
 def __init__(self, actions, transition_func, reward_func, init_state, gamma=0.99, num_agents=2):
     MDP.__init__(self, actions, transition_func, reward_func, init_state=init_state, gamma=gamma)
     self.num_agents = num_agents