def __init__(self, goal_velocity=0): # init base classes Model.__init__(self) RenderInterface2D.__init__(self) self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.goal_velocity = goal_velocity self.force = 0.001 self.gravity = 0.0025 self.low = np.array([self.min_position, -self.max_speed]) self.high = np.array([self.max_position, self.max_speed]) self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high) self.reward_range = (0.0, 1.0) # rendering info self.set_clipping_area((-1.2, 0.6, -0.2, 1.1)) self.set_refresh_interval(10) # in milliseconds # initial reset self.reset()
def __init__(self, _env, n_bins): # initialize base class super().__init__(_env) self.n_bins = n_bins # initialize bins assert n_bins > 0, "DiscretizeStateWrapper requires n_bins > 0" n_states = 1 tol = 1e-8 self.dim = len(self.env.observation_space.low) n_states = n_bins**self.dim self._bins = [] self._open_bins = [] for dd in range(self.dim): range_dd = ( self.env.observation_space.high[dd] - self.env.observation_space.low[dd] ) epsilon = range_dd / n_bins bins_dd = [] for bb in range(n_bins + 1): val = self.env.observation_space.low[dd] + epsilon * bb bins_dd.append(val) self._open_bins.append(tuple(bins_dd[1:])) bins_dd[-1] += tol # "close" the last interval self._bins.append(tuple(bins_dd)) # set observation space self.observation_space = spaces.Discrete(n_states) # List of discretized states self.discretized_states = np.zeros((self.dim, n_states)) for ii in range(n_states): self.discretized_states[:, ii] = self.get_continuous_state(ii, False)
def __init__(self, noise_room1=0.01, noise_room2=0.01): Model.__init__(self) RenderInterface2D.__init__(self) self.noise_room1 = noise_room1 self.noise_room2 = noise_room2 self.observation_space = spaces.Box( low=np.array([0.0, 0.0]), high=np.array([2.0, 1.0]), ) self.action_space = spaces.Discrete(4) self.reward_range = (0.0, 1.0) self.room_noises = [noise_room1, noise_room2] # environment parameters self.action_displacement = 0.1 self.wall_eps = 0.05 # base reward position self.base_reward_pos = np.array([0.8, 0.8]) # rendering info self.set_clipping_area((0, 2, 0, 1)) self.set_refresh_interval(100) # in milliseconds self.renderer_type = "opengl" # reset self.reset()
def __init__(self, R, P, initial_state_distribution=0): Model.__init__(self) self.initial_state_distribution = initial_state_distribution S, A = R.shape self.S = S self.A = A self.R = R self.P = P self.observation_space = spaces.Discrete(S) self.action_space = spaces.Discrete(A) self.reward_range = (self.R.min(), self.R.max()) self.state = None self._states = np.arange(S) self._actions = np.arange(A) self.reset() self._check()
def __init__(self): # init base classes Model.__init__(self) RenderInterface2D.__init__(self) self.reward_range = (-1.0, 0.0) # rendering info bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # (left, right, bottom, top) self.set_clipping_area((-bound, bound, -bound, bound)) self.set_refresh_interval(10) # in milliseconds # observation and action spaces high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low=low, high=high) self.action_space = spaces.Discrete(3) # initialize self.state = None self.reset()
def __init__(self, p, action_list, reward_amplitudes, reward_smoothness, reward_centers, A, B, sigma, sigma_init, mu_init): """ Parameters ----------- p : int parameter of the p-norm action_list : list list of actions {u_1, ..., u_m}, each action u_i is a d'-dimensional array reward_amplitudes: list list of reward amplitudes: {b_1, ..., b_n} reward_smoothness : list list of reward smoothness: {c_1, ..., c_n} reward_centers : list list of reward centers: {x_1, ..., x_n} A : numpy.ndarray array A of size (d, d) B : numpy.ndarray array B of size (d, d') sigma : double transition noise sigma sigma_init : double initial state noise sigma_init mu_init : numpy.ndarray array of size (d,) containing the mean of the initial state """ Model.__init__(self) assert p >= 1, "PBall requires p>=1" if p not in [2, np.inf]: logger.warning("For p!=2 or p!=np.inf, PBall \ does not make true projections onto the lp ball.") self.p = p self.d, self.dp = B.shape # d and d' self.m = len(action_list) self.action_list = action_list self.reward_amplitudes = reward_amplitudes self.reward_smoothness = reward_smoothness self.reward_centers = reward_centers self.A = A self.B = B self.sigma = sigma self.sigma_init = sigma_init self.mu_init = mu_init # State and action spaces low = -1.0 * np.ones(self.d, dtype=np.float64) high = np.ones(self.d, dtype=np.float64) self.observation_space = spaces.Box(low, high) self.action_space = spaces.Discrete(self.m) # reward range assert len(self.reward_amplitudes) == len(self.reward_smoothness) assert len(self.reward_amplitudes) == len(self.reward_centers) if len(self.reward_amplitudes) > 0: assert self.reward_amplitudes.max() <= 1.0 and \ self.reward_amplitudes.min() >= 0.0, \ "reward amplitudes b_i must be in [0, 1]" assert self.reward_smoothness.min() > 0.0, \ "reward smoothness c_i must be > 0" self.reward_range = (0, 1.0) # self.name = "Lp-Ball" # Initalize state self.reset()
def __init__(self, rewards=[], **kwargs): Model.__init__(self, **kwargs) self.n_arms = rewards.shape[1] self.rewards = deque(rewards) self.action_space = spaces.Discrete(self.n_arms)
def __init__(self, laws=[], **kwargs): Model.__init__(self, **kwargs) self.laws = laws self.n_arms = len(self.laws) self.action_space = spaces.Discrete(self.n_arms)