def __init__(self, p, rew, mu=None, gamma=.9, horizon=np.inf): """ Constructor. Args: p (np.ndarray): transition probability matrix; rew (np.ndarray): reward matrix; mu (np.ndarray, None): initial state probability distribution; gamma (float, .9): discount factor; horizon (int, np.inf): the horizon. """ assert p.shape == rew.shape assert mu is None or p.shape[0] == mu.size # MDP parameters self.p = p self.r = rew self.mu = mu # MDP properties observation_space = spaces.Discrete(p.shape[0]) action_space = spaces.Discrete(p.shape[1]) horizon = horizon gamma = gamma mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0)): # MDP properties observation_space = spaces.Discrete(height * width) action_space = spaces.Discrete(4) horizon = np.inf gamma = .95 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info, height, width, start, goal)
def __init__(self, horizon=100, gamma=.95): """ Constructor. Args: horizon (int, 100): horizon of the problem; gamma (float, .95): discount factor. """ # MDP parameters self.max_pos = 1. self.max_velocity = 3. high = np.array([self.max_pos, self.max_velocity]) self._g = 9.81 self._m = 1. self._dt = .1 self._discrete_actions = [-4., 4.] # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(1, 1) super().__init__(mdp_info)
def __init__(self, natural=False, box=True): self.action_space = spaces.Discrete(2) self.box = box if box: self.observation_space = spaces.Box(low=np.zeros(3), high=np.ones(3)) else: self.dims = dims = [32, 11, 2] self.observation_space = spaces.Discrete(np.prod(dims)) ob_space = mushroom_spaces.Discrete(self.observation_space.n) ac_space = mushroom_spaces.Discrete(2) self._mdp_info = MDPInfo(ob_space, ac_space, 1., np.inf) # spaces.Tuple(( # spaces.Discrete(32), # spaces.Discrete(11), # spaces.Discrete(2))) self.seed() # Flag to payout 1.5 on a "natural" blackjack win, like casino rules # Ref: http://www.bicyclecards.com/how-to-play/blackjack/ self.natural = natural # Start the first game self.reset()
def __init__(self, m=2., M=8., l=.5, g=9.8, mu=1e-2, max_u=50., noise_u=10., horizon=3000, gamma=.95): """ Constructor. Args: m (float, 2.0): mass of the pendulum; M (float, 8.0): mass of the cart; l (float, .5): length of the pendulum; g (float, 9.8): gravity acceleration constant; max_u (float, 50.): maximum allowed input torque; noise_u (float, 10.): maximum noise on the action; horizon (int, 3000): horizon of the problem; gamma (float, .95): discount factor. """ # MDP parameters self._m = m self._M = M self._l = l self._g = g self._alpha = 1 / (self._m + self._M) self._mu = mu self._dt = .1 self._max_u = max_u self._noise_u = noise_u high = np.array([np.inf, np.inf]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(3) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) self._last_u = None self._state = None super().__init__(mdp_info)
def __init__(self, items, gamma, horizon, trans_model_abs_path, item_dist=None): # MDP parameters # 1) discrete actions: list of item names or representing integers # 2) actions on n-dimensional space: list of a pair of min and max values per action self.items = items self.action_dim = len(self.items) if item_dist is None: if len(self.items.shape) == 1: if 'none' in self.items: self.item_dist = np.zeros(self.action_dim) self.item_dist[1:] = 1/(self.action_dim-1) else: self.item_dist = 1/(self.action_dim) else: self.item_dist = None else: self.item_dist = item_dist self.gamma = gamma ## discount factor self.horizon = horizon ## time limit to long self.trans_model = ModelMaker(FlexibleTorchModel, model_dir_path=trans_model_abs_path) self.trans_model_params = self.trans_model.model.state_dict() tmp = list(self.trans_model_params.keys()) key = list(filter(lambda x: '0.weight' in x, tmp))[0] self.state_dim = self.trans_model_params[key].shape[1] - self.action_dim if 'none' in self.items: self.state_dim += 1 MM_VAL = 100 self.min_point = np.ones(self.state_dim) * -MM_VAL self.max_point = np.ones(self.state_dim) * MM_VAL if len(self.items.shape) == 1: self._discrete_actions = list(range(self.action_dim)) else: self._discrete_actions = None # MDP properties observation_space = spaces.Box(low=self.min_point, high=self.max_point) if len(self.items.shape) == 1: action_space = spaces.Discrete(self.action_dim) else: action_space = spaces.Box(low=self.items[0][0], high=self.items[0][1]) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, m, g, a, horizon=100, gamma=.95): """ Constructor. """ # MDP parameters self.max_pos = 1. self.max_velocity = 3. high = np.array([self.max_pos, self.max_velocity]) self._g = g self._m = m self._dt = .1 self._discrete_actions = [-a, a] # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)