def __init__(self, policy, mdp_info, learning_rate, sigma_learning_rate=None, update_mode='deterministic', update_type='weighted', init_values=(0., 500.), delta=0.1, minimize_wasserstein=True): super(GaussianDoubleQLearning, self).__init__(policy, mdp_info, learning_rate, sigma_learning_rate, update_mode, update_type, init_values, delta, minimize_wasserstein) self.Qs = [ EnsembleTable(2, mdp_info.size), EnsembleTable(2, mdp_info.size) ] for i in range(len(self.Qs[0])): self.Qs[0][i].table = np.tile([init_values[i]], self.Q[i].shape) for i in range(len(self.Qs[1])): self.Qs[1][i].table = self.Qs[0][i].table.copy() self.Q[i].table = self.Qs[0][i].table.copy() self.alpha = [deepcopy(self.alpha), deepcopy(self.alpha)]
def __init__(self, policy, mdp_info, learning_rate, n_approximators=10, mu=0., sigma=1., p=1., cross_update=False): super(BootstrappedDoubleQLearning, self).__init__(policy, mdp_info, learning_rate, n_approximators, mu, sigma, p, cross_update) self.Qs = [ EnsembleTable(n_approximators, mdp_info.size), EnsembleTable(n_approximators, mdp_info.size) ] for i in range(len(self.Qs[0])): self.Qs[0][i].table = np.random.randn( *self.Qs[0][i].shape) * self._sigma + self._mu for i in range(len(self.Qs[1])): self.Qs[1][i].table = self.Qs[0][i].table.copy() self.Q[i].table = self.Qs[0][i].table.copy() self.alpha = [deepcopy(self.alpha), deepcopy(self.alpha)]
def __init__(self, policy, mdp_info, learning_rate, sigma_learning_rate=None, sigma_1_learning_rate=None, update_mode='deterministic', update_type='weighted', init_values=[0., 0., 500.], delta=0.1, q_max=None, minimize_wasserstein=True, clip_variance=True): self._update_mode = update_mode self._update_type = update_type self.delta = delta self.n_approximators = len(init_values) self.Q = EnsembleTable(len(init_values), mdp_info.size) if q_max is None: q_max = 1 / (1 - mdp_info.gamma) if self.n_approximators == 3: q_max = init_values[0] self.sigma_b = init_values[-1] self.q_max = q_max for i in range(len(self.Q.model)): self.Q.model[i].table = np.tile([init_values[i]], self.Q[i].shape) super(Gaussian, self).__init__(self.Q, policy, mdp_info, learning_rate) if sigma_learning_rate is None: sigma_learning_rate = deepcopy(learning_rate) self.alpha = [ deepcopy(self.alpha), deepcopy(self.alpha), deepcopy(sigma_learning_rate) ] self.minimize_wasserstein = minimize_wasserstein policy = np.zeros(self.mdp_info.size) self.standard_bound = norm.ppf(1 - self.delta, loc=0, scale=1) for s in range(self.mdp_info.size[0]): if self.n_approximators == 3: means, sigmas1, sigmas2 = [x[[s]] for x in self.Q.model] sigmas = sigmas1 + sigmas2 else: means, sigmas = [x[[s]] for x in self.Q.model] bounds = sigmas * self.standard_bound + means bounds = np.clip(bounds, None, self.q_max) actions = np.argwhere(bounds == np.max(bounds)).ravel() n = len(actions) for a in actions: policy[s, a] = 1. / n self.policy_matrix = policy self.last_update = (0, 0) self.clip_variance = clip_variance if self.clip_variance: print("***CLIPPING VARIANCE***")
def __init__(self, policy, mdp_info, learning_rate): self.Q = EnsembleTable(2, mdp_info.size) super().__init__(self.Q, policy, mdp_info, learning_rate) self.alpha = [deepcopy(self.alpha), deepcopy(self.alpha)] assert len(self.Q) == 2, 'The regressor ensemble must' \ ' have exactly 2 models.'
def __init__(self, policy, mdp_info, learning_rate, n_approximators=10, update_mode='deterministic', update_type='weighted', q_min=0, q_max=1): super(ParticleDoubleQLearning, self).__init__( policy, mdp_info, learning_rate, n_approximators, update_mode, update_type, q_min, q_max ) self.Qs = [EnsembleTable(n_approximators, mdp_info.size), EnsembleTable(n_approximators, mdp_info.size)] init_values = np.linspace(q_min, q_max, n_approximators) for i in range(len(self.Qs[0])): self.Qs[0][i].table =np.tile([init_values[i]], self.Q[i].shape) for i in range(len(self.Qs[1])): self.Qs[1][i].table = self.Qs[0][i].table.copy() self.Q[i].table = self.Qs[0][i].table.copy() self.alpha = [deepcopy(self.alpha), deepcopy(self.alpha)]
def __init__(self, policy, mdp_info, learning_rate, n_approximators=10, update_mode='deterministic', update_type='weighted', q_min=0, q_max=1): self._n_approximators = n_approximators self._update_mode = update_mode self._update_type = update_type self.Q = EnsembleTable(self._n_approximators, mdp_info.size) init_values = np.linspace(q_min, q_max, n_approximators) for i in range(len(self.Q.model)): self.Q.model[i].table = np.tile([init_values[i]], self.Q[i].shape) super(Particle, self).__init__(self.Q, policy, mdp_info, learning_rate) self.alpha = [deepcopy(self.alpha)] * n_approximators
def __init__(self, policy, mdp_info, learning_rate, n_approximators=10, mu=0., sigma=1., p=2 / 3., cross_update=False): self._n_approximators = n_approximators self._mu = mu self._sigma = sigma self._p = p self._cross_update = cross_update self._mask = np.random.binomial(1, self._p, self._n_approximators) self.Q = EnsembleTable(self._n_approximators, mdp_info.size) for i in range(len(self.Q.model)): self.Q.model[i].table = np.random.randn( *self.Q[i].shape) * self._sigma + self._mu super(Bootstrapped, self).__init__(self.Q, policy, mdp_info, learning_rate) self.alpha = [deepcopy(self.alpha)] * n_approximators
def __init__(self, policy, mdp_info, learning_rate, n_approximators=10, update_mode='deterministic', update_type='weighted', q_min=0, q_max=1, init_values=None, delta =0.1): self._n_approximators = n_approximators self._update_mode = update_mode self._update_type = update_type self.delta = delta self.quantiles = [i * 1. / (n_approximators - 1) for i in range(n_approximators)] for p in range(n_approximators): if self.quantiles[p] >= 1 - delta: self.delta_index = p break self.Q = EnsembleTable(self._n_approximators, mdp_info.size) if init_values is None: init_values = np.linspace(q_min, q_max, n_approximators) for i in range(len(self.Q.model)): self.Q.model[i].table = np.tile([init_values[i]], self.Q[i].shape) super(Particle, self).__init__(self.Q, policy, mdp_info, learning_rate) self.alpha = [deepcopy(self.alpha)] * n_approximators