示例#1
0
    def __init__(self, goal_velocity=0):
        # init base classes
        Model.__init__(self)
        RenderInterface2D.__init__(self)

        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.5
        self.goal_velocity = goal_velocity

        self.force = 0.001
        self.gravity = 0.0025

        self.low = np.array([self.min_position, -self.max_speed])
        self.high = np.array([self.max_position, self.max_speed])

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(self.low, self.high)

        self.reward_range = (0.0, 1.0)

        # rendering info
        self.set_clipping_area((-1.2, 0.6, -0.2, 1.1))
        self.set_refresh_interval(10)  # in milliseconds

        # initial reset
        self.reset()
示例#2
0
    def __init__(self, _env, n_bins):
        # initialize base class
        super().__init__(_env)

        self.n_bins = n_bins
        # initialize bins
        assert n_bins > 0, "DiscretizeStateWrapper requires n_bins > 0"
        n_states = 1
        tol = 1e-8
        self.dim = len(self.env.observation_space.low)
        n_states = n_bins**self.dim
        self._bins = []
        self._open_bins = []
        for dd in range(self.dim):
            range_dd = (
                self.env.observation_space.high[dd] - self.env.observation_space.low[dd]
            )
            epsilon = range_dd / n_bins
            bins_dd = []
            for bb in range(n_bins + 1):
                val = self.env.observation_space.low[dd] + epsilon * bb
                bins_dd.append(val)
            self._open_bins.append(tuple(bins_dd[1:]))
            bins_dd[-1] += tol  # "close" the last interval
            self._bins.append(tuple(bins_dd))

            # set observation space
        self.observation_space = spaces.Discrete(n_states)

        # List of discretized states
        self.discretized_states = np.zeros((self.dim, n_states))
        for ii in range(n_states):
            self.discretized_states[:, ii] = self.get_continuous_state(ii, False)
示例#3
0
    def __init__(self, noise_room1=0.01, noise_room2=0.01):
        Model.__init__(self)
        RenderInterface2D.__init__(self)

        self.noise_room1 = noise_room1
        self.noise_room2 = noise_room2

        self.observation_space = spaces.Box(
            low=np.array([0.0, 0.0]),
            high=np.array([2.0, 1.0]),
        )
        self.action_space = spaces.Discrete(4)
        self.reward_range = (0.0, 1.0)

        self.room_noises = [noise_room1, noise_room2]

        # environment parameters
        self.action_displacement = 0.1
        self.wall_eps = 0.05

        # base reward position
        self.base_reward_pos = np.array([0.8, 0.8])

        # rendering info
        self.set_clipping_area((0, 2, 0, 1))
        self.set_refresh_interval(100)  # in milliseconds
        self.renderer_type = "opengl"

        # reset
        self.reset()
示例#4
0
    def __init__(self, R, P, initial_state_distribution=0):
        Model.__init__(self)
        self.initial_state_distribution = initial_state_distribution
        S, A = R.shape

        self.S = S
        self.A = A

        self.R = R
        self.P = P

        self.observation_space = spaces.Discrete(S)
        self.action_space = spaces.Discrete(A)
        self.reward_range = (self.R.min(), self.R.max())

        self.state = None

        self._states = np.arange(S)
        self._actions = np.arange(A)

        self.reset()
        self._check()
示例#5
0
    def __init__(self):
        # init base classes
        Model.__init__(self)
        RenderInterface2D.__init__(self)
        self.reward_range = (-1.0, 0.0)

        # rendering info
        bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2
        # (left, right, bottom, top)
        self.set_clipping_area((-bound, bound, -bound, bound))
        self.set_refresh_interval(10)  # in milliseconds

        # observation and action spaces
        high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
        low = -high
        self.observation_space = spaces.Box(low=low, high=high)
        self.action_space = spaces.Discrete(3)

        # initialize
        self.state = None
        self.reset()
示例#6
0
    def __init__(self, p, action_list, reward_amplitudes, reward_smoothness,
                 reward_centers, A, B, sigma, sigma_init, mu_init):
        """
        Parameters
        -----------
        p : int
            parameter of the p-norm
        action_list : list
            list of actions {u_1, ..., u_m}, each action u_i is a
            d'-dimensional array
        reward_amplitudes: list
            list of reward amplitudes: {b_1, ..., b_n}
        reward_smoothness : list
            list of reward smoothness: {c_1, ..., c_n}
        reward_centers : list
            list of reward centers:    {x_1, ..., x_n}
        A : numpy.ndarray
            array A of size (d, d)
        B : numpy.ndarray
            array B of size (d, d')
        sigma : double
            transition noise sigma
        sigma_init : double
            initial state noise sigma_init
        mu_init : numpy.ndarray
            array of size (d,) containing the mean of the initial state
        """
        Model.__init__(self)

        assert p >= 1, "PBall requires p>=1"
        if p not in [2, np.inf]:
            logger.warning("For p!=2 or p!=np.inf, PBall \
does not make true projections onto the lp ball.")
        self.p = p
        self.d, self.dp = B.shape  # d and d'
        self.m = len(action_list)
        self.action_list = action_list
        self.reward_amplitudes = reward_amplitudes
        self.reward_smoothness = reward_smoothness
        self.reward_centers = reward_centers
        self.A = A
        self.B = B
        self.sigma = sigma
        self.sigma_init = sigma_init
        self.mu_init = mu_init

        # State and action spaces
        low = -1.0 * np.ones(self.d, dtype=np.float64)
        high = np.ones(self.d, dtype=np.float64)
        self.observation_space = spaces.Box(low, high)
        self.action_space = spaces.Discrete(self.m)

        # reward range
        assert len(self.reward_amplitudes) == len(self.reward_smoothness)
        assert len(self.reward_amplitudes) == len(self.reward_centers)
        if len(self.reward_amplitudes) > 0:
            assert self.reward_amplitudes.max() <= 1.0 and \
                self.reward_amplitudes.min() >= 0.0, \
                "reward amplitudes b_i must be in [0, 1]"
            assert self.reward_smoothness.min() > 0.0, \
                "reward smoothness c_i must be > 0"
        self.reward_range = (0, 1.0)

        #
        self.name = "Lp-Ball"

        # Initalize state
        self.reset()
示例#7
0
 def __init__(self, rewards=[], **kwargs):
     Model.__init__(self, **kwargs)
     self.n_arms = rewards.shape[1]
     self.rewards = deque(rewards)
     self.action_space = spaces.Discrete(self.n_arms)
示例#8
0
 def __init__(self, laws=[], **kwargs):
     Model.__init__(self, **kwargs)
     self.laws = laws
     self.n_arms = len(self.laws)
     self.action_space = spaces.Discrete(self.n_arms)