示例#1
0
    def __init__(self, p, rew, mu=None, gamma=.9, horizon=np.inf):
        """
        Constructor.

        Args:
            p (np.ndarray): transition probability matrix;
            rew (np.ndarray): reward matrix;
            mu (np.ndarray, None): initial state probability distribution;
            gamma (float, .9): discount factor;
            horizon (int, np.inf): the horizon.

        """
        assert p.shape == rew.shape
        assert mu is None or p.shape[0] == mu.size

        # MDP parameters
        self.p = p
        self.r = rew
        self.mu = mu

        # MDP properties
        observation_space = spaces.Discrete(p.shape[0])
        action_space = spaces.Discrete(p.shape[1])
        horizon = horizon
        gamma = gamma
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
示例#2
0
    def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0)):
        # MDP properties
        observation_space = spaces.Discrete(height * width)
        action_space = spaces.Discrete(4)
        horizon = np.inf
        gamma = .95
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info, height, width, start, goal)
示例#3
0
    def __init__(self, horizon=100, gamma=.95):
        """
        Constructor.

        Args:
            horizon (int, 100): horizon of the problem;
            gamma (float, .95): discount factor.

        """
        # MDP parameters
        self.max_pos = 1.
        self.max_velocity = 3.
        high = np.array([self.max_pos, self.max_velocity])
        self._g = 9.81
        self._m = 1.
        self._dt = .1
        self._discrete_actions = [-4., 4.]

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(2)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(1, 1)

        super().__init__(mdp_info)
    def __init__(self, natural=False, box=True):
        self.action_space = spaces.Discrete(2)
        self.box = box
        if box:
            self.observation_space = spaces.Box(low=np.zeros(3),
                                                high=np.ones(3))
        else:
            self.dims = dims = [32, 11, 2]
            self.observation_space = spaces.Discrete(np.prod(dims))
            ob_space = mushroom_spaces.Discrete(self.observation_space.n)
            ac_space = mushroom_spaces.Discrete(2)
            self._mdp_info = MDPInfo(ob_space, ac_space, 1., np.inf)
        # spaces.Tuple((
        #     spaces.Discrete(32),
        #     spaces.Discrete(11),
        #     spaces.Discrete(2)))
        self.seed()

        # Flag to payout 1.5 on a "natural" blackjack win, like casino rules
        # Ref: http://www.bicyclecards.com/how-to-play/blackjack/
        self.natural = natural

        # Start the first game
        self.reset()
示例#5
0
    def __init__(self,
                 m=2.,
                 M=8.,
                 l=.5,
                 g=9.8,
                 mu=1e-2,
                 max_u=50.,
                 noise_u=10.,
                 horizon=3000,
                 gamma=.95):
        """
        Constructor.

        Args:
            m (float, 2.0): mass of the pendulum;
            M (float, 8.0): mass of the cart;
            l (float, .5): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            max_u (float, 50.): maximum allowed input torque;
            noise_u (float, 10.): maximum noise on the action;
            horizon (int, 3000): horizon of the problem;
            gamma (float, .95): discount factor.

        """
        # MDP parameters
        self._m = m
        self._M = M
        self._l = l
        self._g = g
        self._alpha = 1 / (self._m + self._M)
        self._mu = mu
        self._dt = .1
        self._max_u = max_u
        self._noise_u = noise_u
        high = np.array([np.inf, np.inf])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(3)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None
        self._state = None

        super().__init__(mdp_info)
示例#6
0
    def __init__(self, items, gamma, horizon, trans_model_abs_path, item_dist=None):
        # MDP parameters

        # 1) discrete actions: list of item names or representing integers
        # 2) actions on n-dimensional space: list of a pair of min and max values per action
        self.items = items
        self.action_dim = len(self.items)
        if item_dist is None:
            if len(self.items.shape) == 1:
                if 'none' in self.items:
                    self.item_dist = np.zeros(self.action_dim)
                    self.item_dist[1:] = 1/(self.action_dim-1)
                else:
                    self.item_dist = 1/(self.action_dim)
            else:
                self.item_dist = None
        else:
            self.item_dist = item_dist
        self.gamma = gamma    ## discount factor
        self.horizon = horizon    ## time limit to long
        self.trans_model = ModelMaker(FlexibleTorchModel, model_dir_path=trans_model_abs_path)
        self.trans_model_params = self.trans_model.model.state_dict()
        tmp = list(self.trans_model_params.keys())
        key = list(filter(lambda x: '0.weight' in x, tmp))[0]
        self.state_dim = self.trans_model_params[key].shape[1] - self.action_dim
        if 'none' in self.items:
            self.state_dim += 1

        MM_VAL = 100
        self.min_point = np.ones(self.state_dim) * -MM_VAL
        self.max_point = np.ones(self.state_dim) * MM_VAL
        
        if len(self.items.shape) == 1:
            self._discrete_actions = list(range(self.action_dim))
        else:
            self._discrete_actions = None

        # MDP properties
        observation_space = spaces.Box(low=self.min_point, high=self.max_point)
        if len(self.items.shape) == 1:
            action_space = spaces.Discrete(self.action_dim)
        else:
            action_space = spaces.Box(low=self.items[0][0], high=self.items[0][1])
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
示例#7
0
    def __init__(self, m, g, a, horizon=100, gamma=.95):
        """
        Constructor.

        """
        # MDP parameters
        self.max_pos = 1.
        self.max_velocity = 3.
        high = np.array([self.max_pos, self.max_velocity])
        self._g = g
        self._m = m
        self._dt = .1
        self._discrete_actions = [-a, a]

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(2)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)