Пример #1
0
    def __init__(self,
                 reward_free=False,
                 difficulty=0,
                 array_observation=False):
        self.reward_free = reward_free
        self.difficulty = difficulty
        self.array_observation = array_observation

        if difficulty not in [0, 1, 2]:
            raise ValueError("FourRoom difficulty must be in [0, 1, 2]")

        # Common parameters
        nrows = 9
        ncols = 9
        start_coord = (0, 0)
        terminal_states = ((8, 0), )
        success_probability = 0.95
        #
        walls = ()
        for ii in range(9):
            if ii not in [2, 6]:
                walls += ((ii, 4), )
        for jj in range(9):
            if jj != 7:
                walls += ((4, jj), )

        # Default reward according to the difficulty
        if difficulty in [0, 1]:
            default_reward = 0.0
        elif difficulty == 2:
            default_reward = -0.005

        # Rewards according to the difficulty
        if self.reward_free:
            reward_at = {}
        else:
            if difficulty == 0:
                reward_at = {(8, 0): 1.0}
            elif difficulty in [1, 2]:
                reward_at = {
                    (8, 0): 1.0,
                    (3, 3): 0.1,
                }

        # Init base class
        GridWorld.__init__(
            self,
            nrows=nrows,
            ncols=ncols,
            start_coord=start_coord,
            terminal_states=terminal_states,
            success_probability=success_probability,
            reward_at=reward_at,
            walls=walls,
            default_reward=default_reward,
        )

        # spaces
        if self.array_observation:
            self.observation_space = spaces.Box(0.0, 1.0, shape=(2, ))
Пример #2
0
    def __init__(self, reward_free=False, array_observation=False):
        self.reward_free = reward_free
        self.array_observation = array_observation

        # Common parameters
        nrows = 13
        ncols = 17
        start_coord = (5, 1)
        terminal_states = ((7, 7),)
        success_probability = 0.95
        #
        walls = ()
        for ii in range(13):
            walls += ((ii, 0),)
            walls += ((ii, 16),)
        for jj in range(17):
            walls += ((0, jj),)
            walls += ((12, jj),)
        for ii in range(13):
            if ii not in [1, 11]:
                walls += ((ii, 6),)
                walls += ((ii, 10),)
        walls += ((11, 6),)
        for jj in range(17):
            if jj not in [1, 15]:
                walls += ((6, jj),)

        # Default reward according to the difficulty
        default_reward = 0

        # Rewards according to the difficulty
        if self.reward_free:
            reward_at = {}
        else:
            reward_at = {
                        (7, 7): 10.0,
                        (8, 2): 1.0,
                        (10, 3): 1.0
                        }
            for jj in range(7, 16):
                for ii in range(1, 12):
                    if (ii, jj) not in walls and (ii, jj) != (7, 7):
                        reward_at[(ii, jj)] = -0.05

        # Init base class
        GridWorld.__init__(self,
                           nrows=nrows,
                           ncols=ncols,
                           start_coord=start_coord,
                           terminal_states=terminal_states,
                           success_probability=success_probability,
                           reward_at=reward_at,
                           walls=walls,
                           default_reward=default_reward)

        # spaces
        if self.array_observation:
            self.observation_space = spaces.Box(0.0, 1.0, shape=(2,))
Пример #3
0
    def __init__(self, reward_free=False, array_observation=False):
        self.reward_free = reward_free
        self.array_observation = array_observation

        # Common parameters
        nrows = 11
        ncols = 17
        start_coord = (0, 0)
        terminal_states = ((10, 0), )
        success_probability = 0.95
        #
        walls = ()
        for ii in range(11):
            if ii not in [2, 8]:
                walls += ((ii, 5), )
                walls += ((ii, 11), )
        for jj in range(17):
            if jj != 15:
                walls += ((5, jj), )

        # Default reward according to the difficulty
        default_reward = -0.001

        # Rewards according to the difficulty
        if self.reward_free:
            reward_at = {}
        else:
            reward_at = {
                (10, 0): 10.0,
                (4, 4): 0.1,
            }

        # Init base class
        GridWorld.__init__(
            self,
            nrows=nrows,
            ncols=ncols,
            start_coord=start_coord,
            terminal_states=terminal_states,
            success_probability=success_probability,
            reward_at=reward_at,
            walls=walls,
            default_reward=default_reward,
        )

        # spaces
        if self.array_observation:
            self.observation_space = spaces.Box(0.0, 1.0, shape=(2, ))
Пример #4
0
    def __init__(
        self,
        nrooms=7,
        reward_free=False,
        array_observation=False,
        room_size=5,
        success_probability=0.95,
        remove_walls=False,
        initial_state_distribution="center",
        include_traps=False,
    ):

        assert nrooms > 0, "nrooms must be > 0"
        assert initial_state_distribution in ("center", "uniform")

        self.reward_free = reward_free
        self.array_observation = array_observation
        self.nrooms = nrooms
        self.room_size = room_size
        self.success_probability = success_probability
        self.remove_walls = remove_walls
        self.initial_state_distribution = initial_state_distribution
        self.include_traps = include_traps

        # Max number of rooms/columns per row
        self.max_rooms_per_row = 5

        # Room size (default = 5x5)
        self.room_size = room_size

        # Grid size
        self.room_nrows = math.ceil(nrooms / self.max_rooms_per_row)
        if self.room_nrows > 1:
            self.room_ncols = self.max_rooms_per_row
        else:
            self.room_ncols = nrooms
        nrows = self.room_size * self.room_nrows + (self.room_nrows - 1)
        ncols = self.room_size * self.room_ncols + (self.room_ncols - 1)

        # # walls
        walls = []
        for room_col in range(self.room_ncols - 1):
            col = (room_col + 1) * (self.room_size + 1) - 1
            for jj in range(nrows):
                if (jj % (self.room_size + 1)) != (self.room_size // 2):
                    walls.append((jj, col))

        for room_row in range(self.room_nrows - 1):
            row = (room_row + 1) * (self.room_size + 1) - 1
            for jj in range(ncols):
                walls.append((row, jj))

        # process each room
        start_coord = None
        terminal_state = None
        self.traps = []
        count = 0
        for room_r in range(self.room_nrows):
            if room_r % 2 == 0:
                cols_iterator = range(self.room_ncols)
            else:
                cols_iterator = reversed(range(self.room_ncols))
            for room_c in cols_iterator:
                # existing rooms
                if count < self.nrooms:
                    # remove top wall
                    if ((room_c == self.room_ncols - 1) and (room_r % 2 == 0)) or (
                        (room_c == 0) and (room_r % 2 == 1)
                    ):
                        if room_r != self.room_nrows - 1:
                            wall_to_remove = self._convert_room_coord_to_global(
                                room_r, room_c, self.room_size, self.room_size // 2
                            )
                            if wall_to_remove in walls:
                                walls.remove(wall_to_remove)
                # rooms to remove
                else:
                    for ii in range(-1, self.room_size + 1):
                        for jj in range(-1, self.room_size + 1):
                            wall_to_include = self._convert_room_coord_to_global(
                                room_r, room_c, ii, jj
                            )
                            if (
                                wall_to_include[0] >= 0
                                and wall_to_include[0] < nrows
                                and wall_to_include[1] >= 0
                                and wall_to_include[1] < ncols
                                and (wall_to_include not in walls)
                            ):
                                walls.append(wall_to_include)
                    pass

                # start coord
                if count == nrooms // 2:
                    start_coord = self._convert_room_coord_to_global(
                        room_r, room_c, self.room_size // 2, self.room_size // 2
                    )
                # terminal state
                if count == nrooms - 1:
                    terminal_state = self._convert_room_coord_to_global(
                        room_r, room_c, self.room_size // 2, self.room_size // 2
                    )
                # trap
                if include_traps:
                    self.traps.append(
                        self._convert_room_coord_to_global(
                            room_r,
                            room_c,
                            self.room_size // 2 + 1,
                            self.room_size // 2 + 1,
                        )
                    )
                count += 1

        terminal_states = (terminal_state,) + tuple(self.traps)

        if self.reward_free:
            reward_at = {}
        else:
            reward_at = {
                terminal_state: 1.0,
                start_coord: 0.01,
                (self.room_size // 2, self.room_size // 2): 0.1,
            }

        # Check remove_walls
        if remove_walls:
            walls = ()

        # Init base class
        GridWorld.__init__(
            self,
            nrows=nrows,
            ncols=ncols,
            start_coord=start_coord,
            terminal_states=terminal_states,
            success_probability=success_probability,
            reward_at=reward_at,
            walls=walls,
            default_reward=0.0,
        )

        # Check initial distribution
        if initial_state_distribution == "uniform":
            distr = np.ones(self.observation_space.n) / self.observation_space.n
            self.set_initial_state_distribution(distr)

        # spaces
        if self.array_observation:
            self.discrete_observation_space = self.observation_space
            self.observation_space = spaces.Box(0.0, 1.0, shape=(2,))