Пример #1
0
    def __init__(self,
                 policy,
                 mdp_info,
                 learning_rate,
                 sigma_learning_rate=None,
                 update_mode='deterministic',
                 update_type='weighted',
                 init_values=(0., 500.),
                 delta=0.1,
                 minimize_wasserstein=True):
        super(GaussianDoubleQLearning,
              self).__init__(policy, mdp_info, learning_rate,
                             sigma_learning_rate, update_mode, update_type,
                             init_values, delta, minimize_wasserstein)

        self.Qs = [
            EnsembleTable(2, mdp_info.size),
            EnsembleTable(2, mdp_info.size)
        ]

        for i in range(len(self.Qs[0])):
            self.Qs[0][i].table = np.tile([init_values[i]], self.Q[i].shape)

        for i in range(len(self.Qs[1])):
            self.Qs[1][i].table = self.Qs[0][i].table.copy()
            self.Q[i].table = self.Qs[0][i].table.copy()
        self.alpha = [deepcopy(self.alpha), deepcopy(self.alpha)]
    def __init__(self,
                 policy,
                 mdp_info,
                 learning_rate,
                 n_approximators=10,
                 mu=0.,
                 sigma=1.,
                 p=1.,
                 cross_update=False):
        super(BootstrappedDoubleQLearning,
              self).__init__(policy, mdp_info, learning_rate, n_approximators,
                             mu, sigma, p, cross_update)

        self.Qs = [
            EnsembleTable(n_approximators, mdp_info.size),
            EnsembleTable(n_approximators, mdp_info.size)
        ]

        for i in range(len(self.Qs[0])):
            self.Qs[0][i].table = np.random.randn(
                *self.Qs[0][i].shape) * self._sigma + self._mu

        for i in range(len(self.Qs[1])):
            self.Qs[1][i].table = self.Qs[0][i].table.copy()
            self.Q[i].table = self.Qs[0][i].table.copy()

        self.alpha = [deepcopy(self.alpha), deepcopy(self.alpha)]
Пример #3
0
    def __init__(self,
                 policy,
                 mdp_info,
                 learning_rate,
                 sigma_learning_rate=None,
                 sigma_1_learning_rate=None,
                 update_mode='deterministic',
                 update_type='weighted',
                 init_values=[0., 0., 500.],
                 delta=0.1,
                 q_max=None,
                 minimize_wasserstein=True,
                 clip_variance=True):
        self._update_mode = update_mode
        self._update_type = update_type
        self.delta = delta

        self.n_approximators = len(init_values)
        self.Q = EnsembleTable(len(init_values), mdp_info.size)
        if q_max is None:
            q_max = 1 / (1 - mdp_info.gamma)
        if self.n_approximators == 3:
            q_max = init_values[0]
            self.sigma_b = init_values[-1]
            self.q_max = q_max

        for i in range(len(self.Q.model)):
            self.Q.model[i].table = np.tile([init_values[i]], self.Q[i].shape)

        super(Gaussian, self).__init__(self.Q, policy, mdp_info, learning_rate)
        if sigma_learning_rate is None:
            sigma_learning_rate = deepcopy(learning_rate)

        self.alpha = [
            deepcopy(self.alpha),
            deepcopy(self.alpha),
            deepcopy(sigma_learning_rate)
        ]
        self.minimize_wasserstein = minimize_wasserstein
        policy = np.zeros(self.mdp_info.size)
        self.standard_bound = norm.ppf(1 - self.delta, loc=0, scale=1)
        for s in range(self.mdp_info.size[0]):
            if self.n_approximators == 3:
                means, sigmas1, sigmas2 = [x[[s]] for x in self.Q.model]
                sigmas = sigmas1 + sigmas2
            else:
                means, sigmas = [x[[s]] for x in self.Q.model]
            bounds = sigmas * self.standard_bound + means
            bounds = np.clip(bounds, None, self.q_max)
            actions = np.argwhere(bounds == np.max(bounds)).ravel()
            n = len(actions)
            for a in actions:
                policy[s, a] = 1. / n
        self.policy_matrix = policy
        self.last_update = (0, 0)
        self.clip_variance = clip_variance
        if self.clip_variance:
            print("***CLIPPING VARIANCE***")
Пример #4
0
    def __init__(self, policy, mdp_info, learning_rate):
        self.Q = EnsembleTable(2, mdp_info.size)

        super().__init__(self.Q, policy, mdp_info, learning_rate)

        self.alpha = [deepcopy(self.alpha), deepcopy(self.alpha)]

        assert len(self.Q) == 2, 'The regressor ensemble must' \
                                 ' have exactly 2 models.'
Пример #5
0
    def __init__(self, policy, mdp_info, learning_rate, n_approximators=10, update_mode='deterministic',
                 update_type='weighted', q_min=0, q_max=1):
        super(ParticleDoubleQLearning, self).__init__(
            policy, mdp_info, learning_rate, n_approximators, update_mode,
                 update_type,  q_min, q_max
        )

        self.Qs = [EnsembleTable(n_approximators, mdp_info.size),
                   EnsembleTable(n_approximators, mdp_info.size)]
        init_values = np.linspace(q_min, q_max, n_approximators)
        for i in range(len(self.Qs[0])):
            self.Qs[0][i].table =np.tile([init_values[i]], self.Q[i].shape)

        for i in range(len(self.Qs[1])):
            self.Qs[1][i].table = self.Qs[0][i].table.copy()
            self.Q[i].table = self.Qs[0][i].table.copy()

        self.alpha = [deepcopy(self.alpha), deepcopy(self.alpha)]
Пример #6
0
    def __init__(self, policy, mdp_info, learning_rate, n_approximators=10, update_mode='deterministic',
                 update_type='weighted', q_min=0, q_max=1):
        self._n_approximators = n_approximators
        self._update_mode = update_mode
        self._update_type = update_type
        self.Q = EnsembleTable(self._n_approximators, mdp_info.size)
        init_values = np.linspace(q_min, q_max, n_approximators)
        for i in range(len(self.Q.model)):
            self.Q.model[i].table = np.tile([init_values[i]], self.Q[i].shape)

        super(Particle, self).__init__(self.Q, policy, mdp_info,
                                           learning_rate)

        self.alpha = [deepcopy(self.alpha)] * n_approximators
Пример #7
0
    def __init__(self, policy, mdp_info, learning_rate, n_approximators=10,
                 mu=0., sigma=1., p=2 / 3., cross_update=False):
        self._n_approximators = n_approximators
        self._mu = mu
        self._sigma = sigma
        self._p = p
        self._cross_update = cross_update
        self._mask = np.random.binomial(1, self._p, self._n_approximators)
        self.Q = EnsembleTable(self._n_approximators, mdp_info.size)
        for i in range(len(self.Q.model)):
            self.Q.model[i].table = np.random.randn(
                *self.Q[i].shape) * self._sigma + self._mu

        super(Bootstrapped, self).__init__(self.Q, policy, mdp_info,
                                           learning_rate)

        self.alpha = [deepcopy(self.alpha)] * n_approximators
Пример #8
0
    def __init__(self, policy, mdp_info, learning_rate, n_approximators=10, update_mode='deterministic',
                 update_type='weighted', q_min=0, q_max=1, init_values=None, delta =0.1):
        self._n_approximators = n_approximators
        self._update_mode = update_mode
        self._update_type = update_type
        self.delta = delta
        self.quantiles = [i * 1. / (n_approximators - 1) for i in range(n_approximators)]
        for p in range(n_approximators):
            if self.quantiles[p] >= 1 - delta:
                self.delta_index = p
                break

        self.Q = EnsembleTable(self._n_approximators, mdp_info.size)
        if init_values is None:
            init_values = np.linspace(q_min, q_max, n_approximators)
        for i in range(len(self.Q.model)):
            self.Q.model[i].table = np.tile([init_values[i]], self.Q[i].shape)

        super(Particle, self).__init__(self.Q, policy, mdp_info,
                                           learning_rate)

        self.alpha = [deepcopy(self.alpha)] * n_approximators