def loss(self, model_out, batch): target, sentence_scores = batch sentence_pred = model_out['scores'] print(sentence_pred) print(sentence_scores) sentence_scores = sentence_scores.to(self.device) if not self.sentence_sigma: loss = self.mse_loss(sentence_pred, sentence_scores) else: mean = model_out['sent_mu'] sigma = model_out['sent_sigma'] # Compute log-likelihood of x given mu, sigma normal = Normal(mean, sigma) # Renormalize on [0,1] for truncated Gaussian partition_function = (normal.cdf(1) - normal.cdf(0)).detach() nll = partition_function.log() - normal.log_prob(sentence_scores) loss = nll.sum() loss_dict = OrderedDict() loss_dict['loss'] = loss return loss_dict
def forward(self, x): yhat = MADE.forward(self, x) x0_params = yhat[:, ::2] x1_params = yhat[:, 1::2] mu_x0 = x0_params[:, ::3] sigma_x0 = x0_params[:, 1::3] sigma_x0 = nn.ReLU()(sigma_x0) + torch.ones(sigma_x0.shape) * 0.1 pi_x0: torch.Tensor= x0_params[:, 2::3] pi_x0 = pi_x0.softmax(dim=-1) x0_dist = Normal(mu_x0, sigma_x0) x0 = x[:, 0].float() df_x0 = pi_x0 * torch.exp(x0_dist.log_prob(x0[:, None])) z0 = (pi_x0 * x0_dist.cdf(x0[:, None])).sum(dim=-1) jac0 = df_x0.sum(dim=-1) mu_x1 = x1_params[:, ::3] sigma_x1: torch.Tensor = x1_params[:, 1::3] sigma_x1 = nn.ReLU()(sigma_x1) + torch.ones(sigma_x1.shape) * 0.1 pi_x1 = x1_params[:, 2::3] pi_x1 = pi_x1.softmax(dim=-1) x1_dist = Normal(mu_x1, sigma_x1) x1 = x[:, 1].float() df_x1 = pi_x1 * torch.exp(x1_dist.log_prob(x1[:, None])) z1 = (pi_x1 * x1_dist.cdf(x1[:, None])).sum(dim=-1) jac1 = df_x1.sum(dim=-1) ll = jac0.log() + jac1.log() z = torch.cat([z0[:, None], z1[:, None]], dim=-1) return yhat, ll, z
def expected_log_cdf(i): if i < self.n_speakers: qd1 = torch.zeros_like(logspec0) for c in range(self.gmm['n_components']): pd_x = Normal(self.gmm['means'][c], self.gmm['stds'][c]) qd1 += self.qz[i,c][:,None] * (pd_x.cdf(logspec0) + 1e-6).log() else: pd_x = self.noise_model qd1 = (pd_x.cdf(logspec0) + 1e-6).log() return qd1
def update_qZ(self, logspec0): '''Updates q(Z) approximate posterior. Args: logspec0 (torch.Tensor): Spectral features of shape (T,F) ''' mu_vs, logvar_vs, mu_us, logvar_us = self.qz param_to_optimize = (*self.qz, ) optim_z = optim.SGD(param_to_optimize, lr=self.qz_learn_rate) n_t = logspec0.shape[0] for i_iter in range(self.qz_n_updates): optim_z.zero_grad() kls, explogliks = [], [] for _ in range(self.n_z_samples): # sample q(Z1), q(Z2) u_samp = sample_normal(mu_us, logvar_us) v_samp = sample_normal(mu_vs, logvar_vs) # forward Z1, Z2 through VAE decoder mu_x, logvar_x = self.vae.emission(v_samp, u_samp, [n_t] * self.n_speakers) mu_x, logvar_x = mu_x[:, :n_t, :], logvar_x[:, :n_t, :] pd_x = Normal(mu_x, torch.exp(0.5 * logvar_x)) # compute loss Eq.(21) kluv, _ = self.vae.kl_divergence_expected_speaker( ((mu_vs, logvar_vs), (mu_us, logvar_us), u_samp, v_samp), torch.tensor([n_t])) kls.append(kluv.sum() / self.n_z_samples) exploglik = self.qd[:self.n_speakers] * torch.clamp( pd_x.log_prob(logspec0), -14, 100) exploglik += (1 - self.qd[:self.n_speakers]) * ( pd_x.cdf(logspec0) + 1e-6).log() explogliks.append(exploglik.sum() / self.n_z_samples) pd_x = self.noise_model exploglik = self.qd[-1] * torch.clamp(pd_x.log_prob(logspec0), -14, 100) exploglik += (1 - self.qd[-1]) * (pd_x.cdf(logspec0) + 1e-6).log() explogliks.append(exploglik.sum()) objf = -(torch.sum(torch.stack(explogliks)) - self.kl_weight * torch.sum(torch.stack(kls))) objf.backward() for p in param_to_optimize: nn.utils.clip_grad_value_(p, 5) optim_z.step() self.qz = (mu_vs, logvar_vs, mu_us, logvar_us)
def sentence_loss(self, model_out, batch): """Compute Sentence score loss""" sentence_pred = model_out[const.SENTENCE_SCORES] sentence_scores = batch.sentence_scores if not self.sentence_sigma: return self.mse_loss(sentence_pred, sentence_scores) else: sigma = model_out[const.SENT_SIGMA] mean = model_out['SENT_MU'] # Compute log-likelihood of x given mu, sigma normal = Normal(mean, sigma) # Renormalize on [0,1] for truncated Gaussian partition_function = (normal.cdf(1) - normal.cdf(0)).detach() nll = partition_function.log() - normal.log_prob(sentence_scores) return nll.sum()
def forward(self, candidate_set): self.gp_model.eval() self.gp_model.likelihood.eval() pred = self.gp_model.likelihood(self.gp_model(candidate_set)) mu = pred.mean().detach() sigma = pred.std().detach() # K samples of the posterior function f f_samples = pred.sample(self.K) # K samples of y_star ys = f_samples.max(dim=0)[0] ysArray = ys.unsqueeze(0).expand(candidate_set.shape[0], self.K) # compute gamma_y_star muArray = mu.unsqueeze(1).expand(candidate_set.shape[0], self.K) sigmaArray = sigma.unsqueeze(1).expand(candidate_set.shape[0], self.K) gamma = (ysArray - muArray) / sigmaArray # Compute the acquisition function of MES. m = Normal(torch.Tensor([0.0]), torch.Tensor([1.0])) # standard normal pdfgamma = torch.exp(m.log_prob(gamma)) cdfgamma = m.cdf(gamma) mve = torch.mean(gamma * pdfgamma / (2 * cdfgamma) - torch.log(cdfgamma), dim=1) return mve
class GaussianCopulaVariable(nn.Module): arg_constraints = {'loc': constraints.real, 'scale': constraints.positive} def __init__(self, loc, scale, covariance_matrix=None, validate_args=None): super(GaussianCopulaVariable, self).__init__() # affine = AffineTransform(-loc, 1/scale) self.multinormal = MultivariateNormal(loc, covariance_matrix) self.normal = Normal(loc, scale) self.standard_normal = Normal(0, 1) self.loc = loc self.scale = scale self.covariance_matrix = covariance_matrix def forward(self, x): pass def sample(self): r''' Sample from Gaussian Copula q ~ N(0, \Sigma) u = [cdf(q_1),...,cdf(q_n)]^T ''' q = self.multinormal.rsample() u = torch.stack([self.standard_normal.cdf(q_i/s_i) for q_i, s_i in zip(q, self.scale)]).squeeze() return u
def update_qZ(self, logspec0): '''Updates q(Z) approximate posterior. Args: logspec0 (torch.Tensor): Spectral features of shape (T,F) ''' n_t, n_f = logspec0.shape self.qz = torch.zeros(self.n_speakers, self.gmm['n_components'], n_t) for i in range(self.n_speakers): for c in range(self.gmm['n_components']): pd_x1 = Normal(self.gmm['means'][c], self.gmm['stds'][c]) self.qz[i,c] = (self.qd[i] * torch.clamp(pd_x1.log_prob(logspec0), -14, 100)).sum(dim = 1) self.qz[i,c] += ((1 - self.qd[i]) * (pd_x1.cdf(logspec0) + 1e-6).log()).sum(dim= 1) self.qz[i,c] += self.gmm['weights'][c] self.qz = self.qz - self.qz.max(dim = 1, keepdim = True)[0] self.qz = self.qz.exp() self.qz = self.qz / (self.qz.sum(axis = 1, keepdim = True) + 1e-6) self.qz = self.qz.clamp(1e-6, 1 - 1e-6)
def log_likelihood(): lh = 0 marginCDF = torch.zeros(n, d) if marginals == 'Normal': for j in range(d): norm = Normal(hyperOptimizeParams[j]['loc'], hyperOptimizeParams[j]['scale']) marginCDF[:, j] = norm.cdf(X[:, j]).cuda() # print('marginCDF[:,j] shape:', marginCDF[:,j].shape) # print('marginCDF[:,j]:', marginCDF[:,j]) idx = np.argwhere(marginCDF[:, j] == 1.0) if idx.nelement() != 0: for i in range(idx.shape[1]): marginCDF[idx[0, i].item(), j] -= 1e-2 # The first member : the copula's density for i in range(n): pdf_val = self.pdf_param(marginCDF[i, :]) lh += torch.log(pdf_val if pdf_val.data != 0.0 else torch. tensor([[1e-5]]).cuda()) # The second member : sum of PDF # print("OK") # print('first lh:', lh) for j in range(d): norm = Normal(hyperOptimizeParams[j]['loc'], hyperOptimizeParams[j]['scale']) lh += (norm.log_prob(X[:, j])).sum().cuda() # print('lh:', lh) return lh
def _fit_isotonic(model,train_loader): t_start = perf_counter() means,stds,ys = model.mc_prediction_loader(train_loader) N = means.shape[0] dist = Normal(means,stds) cdf = dist.cdf(ys) sorted_cdf,ind = cdf.sort() #[N] y = torch.arange(1.0,N+1)/N #[N] ir = IsotonicRegression(out_of_bounds='clip') x = sorted_cdf.cpu().numpy() #[N] y = y.numpy() #[N] x_app = np.insert(x,0,0.0) y_app = np.insert(y,0,0.0) y_ = ir.fit_transform(x_app, y_app)#[N] delta = _delta(means,stds,ys) #for synchronizing cuda calls torch.cuda.synchronize() #stop and measure the time taken for postprocessing method t_stop = perf_counter() iso_time = torch.tensor(t_stop - t_start) return ir,delta,sorted_cdf,iso_time
def get_hazard_survival(self, model, x, t): """ Computing the hazard and Survival functions. """ # Computing the score score = model(x).reshape(-1, 1) # Extracting beta beta = list(model.parameters())[-1] # Initializing the Normal distribution from torch.distributions.normal import Normal m = Normal(torch.tensor([0.0]), torch.tensor([1.0])) # Computing hazard and Survival hazard = (torch.log(t) - torch.log(score)) / (np.sqrt(2) * beta) Survival = 1. - m.cdf( (torch.log(t) - torch.log(score)) / (np.sqrt(2) * beta)) hazard = hazard * (torch.log(t) - torch.log(score)) / (np.sqrt(2) * beta) hazard = torch.exp(-hazard / 2.) hazard = hazard / (np.sqrt(2 * np.pi) * Survival * (t * beta)) hazard = torch.max(hazard, torch.FloatTensor([1e-6])) Survival = torch.max(Survival, torch.FloatTensor([1e-6])) return hazard, Survival
def forward(self, x): x = x.view(-1, 1) weights = self.weight_logits.softmax(dim=0).view(1, -1) distribution = Normal(self.mus, self.log_sigmas.exp()) z = (distribution.cdf(x) * weights).sum(dim=1) dz_by_dx = (distribution.log_prob(x).exp() * weights).sum(dim=1) return z, dz_by_dx
def forward(self, X: Tensor) -> Tensor: r"""Evaluate Constrained Expected Improvement on the candidate set X. Args: X: A `(b) x 1 x d`-dim Tensor of `(b)` t-batches of `d`-dim design points each. Returns: A `(b)`-dim Tensor of Expected Improvement values at the given design points `X`. """ # import pdb; pdb.set_trace() if X.dim() == 1: X = X.view(1, self.dim) means, sigmas = self._get_posterior_reimplemented(X) # (b) x 1 mean_obj = means[..., [self.objective_index]] sigma_obj = sigmas[..., [self.objective_index]] # print("mean_obj.shape:",mean_obj.shape) # print("sigma_obj.shape:",sigma_obj.shape) # print("means.shape:",means.shape) # print("sigmas.shape:",sigmas.shape) # Probability of feasibility: prob_feas = self._compute_prob_feas(X=X, means=means, sigmas=sigmas) # print("prob_feas.shape:",prob_feas.shape) if self.only_prob: ei_times_prob = prob_feas # Use only the probability of feasibility else: u = (mean_obj - self.best_f.expand_as(mean_obj)) / sigma_obj if not self.maximize: u = -u normal = Normal( torch.zeros(1, device=u.device, dtype=u.dtype), torch.ones(1, device=u.device, dtype=u.dtype), ) ei_pdf = torch.exp(normal.log_prob(u)) # (b) x 1 ei_cdf = normal.cdf(u) ei = sigma_obj * (ei_pdf + u * ei_cdf) ei_times_prob = ei.mul(prob_feas) # print("ei_times_prob.shape:",ei_times_prob.shape) val = ei_times_prob.squeeze(dim=-1) if val.dim() == 1 and len(val) == 1 or val.dim() == 0: val = val.item() # else: # pdb.set_trace() # print("X.shape:",X.shape) # print("val:",val) return val
class SplicedNormCurve: def __init__(self, mean1, mean2, intervalWidth): self.normal1 = Normal(mean1, 1) self.normal2 = Normal(mean2, 1) self.splicePoint = (mean1 + mean2) / 2.0 self.addCdf1 = 1 / 2 # is equal to cdf1(splicePoint) / C self.normConstant = 2 * self.normal1.cdf(self.splicePoint) self.substractCdf2 = self.normal2.cdf( self.splicePoint) / self.normConstant self.interval = torch.distributions.Uniform(mean1 - intervalWidth, mean2 + intervalWidth) self.vectorizeCurve = np.vectorize(self.point) self.vectorizeCDF = np.vectorize(lambda x: (x, self.cdfCurve(x))) def curve(self, x): # piecewise = np.piecewise(x, [x < self.splicePoint, x >= self.splicePoint], [self.normal1.log_prob, self.normal2.log_prob]) # return torch.exp(piecewise / self.normConstant) with torch.no_grad(): return self.prob( x, self.normal1 if x < self.splicePoint else self.normal2) def cdfCurve(self, x): with torch.no_grad(): return (self.normal1.cdf(x) / self.normConstant) if x < self.splicePoint else ( (self.normal2.cdf(x) / self.normConstant) + self.addCdf1 - self.substractCdf2) def prob(self, x, distr): return torch.exp(distr.log_prob(x)) / self.normConstant def point(self, x): return x, self.curve(x) def sample(self, sampleSize): return sampleCdf( self.cdfCurve, sampleSize ) # mb it will be nice to give splicePoint as initial guess def sampleCurve(self, sampleSize): sortedArguments = torch.sort(self.interval.sample((sampleSize, ))) return self.vectorizeCurve(sortedArguments[0]) def sampleCDF(self, sampleSize): sortedArguments = torch.sort(self.interval.sample((sampleSize, ))) return self.vectorizeCDF(sortedArguments[0])
def setup_class(cls): cls.data_info = dict(mean='mean_pred', logvar='logvar_pred', x='real_imgs') cls.tar_shape = [2, 2, 4, 4] cls.mean_pred = torch.zeros(cls.tar_shape) cls.logvar_pred = torch.zeros(cls.tar_shape) cls.real_imgs = torch.zeros(cls.tar_shape) cls.output_dict = dict(mean_pred=cls.mean_pred, logvar_pred=cls.logvar_pred, real_imgs=cls.real_imgs) norm_dist = Normal(0, 1) cls.gt_loss = torch.log( norm_dist.cdf(torch.FloatTensor([1 / 255])) - norm_dist.cdf(torch.FloatTensor([-1 / 255])))
def outlier_test(self, y, mu, std): N = Normal(mu, std) if self.test == "z": z = np.abs((y - mu) / std) outlier = z[z > 3] else: prob = 1 - N.cdf(y) return prob
def forward(self, x, condition): x = x.view(-1, 1) mus, log_sigmas, weight_logits = torch.chunk(self.cdf(condition), 3, dim=1) weights = weight_logits.softmax(dim=1) distribution = Normal(mus, log_sigmas.exp()) z = (distribution.cdf(x) * weights).sum(dim=1) dz_by_dx = (distribution.log_prob(x).exp() * weights).sum(dim=1) return z, dz_by_dx
def forward(self, x: Tensor) -> Tensor: if not x.numel() == 1: raise ValueError('PoI can only sample one value at a time.') mean = self.model.mean(x) cov = self.model.kernel(x, x) normal = Normal(mean, cov) return normal.cdf(x)
def _prob_in_top_k(self, clean_values, noisy_values, noise_stddev, noisy_top_values): """Helper function to NoisyTopKGating. Computes the probability that value is in top k, given different random noise. This gives us a way of backpropagating from a loss that balances the number of times each expert is in the top k experts per example. In the case of no noise, pass in None for noise_stddev, and the result will not be differentiable. Args: clean_values: a `Tensor` of shape [batch, n]. noisy_values: a `Tensor` of shape [batch, n]. Equal to clean values plus normally distributed noise with standard deviation noise_stddev. noise_stddev: a `Tensor` of shape [batch, n], or None noisy_top_values: a `Tensor` of shape [batch, m]. "values" Output of tf.top_k(noisy_top_values, m). m >= k+1 Returns: a `Tensor` of shape [batch, n]. """ batch = clean_values.size(0) m = noisy_top_values.size(1) top_values_flat = noisy_top_values.flatten() threshold_positions_if_in = ( torch.arange(batch, device=clean_values.device) * m + self.top_k) threshold_if_in = torch.unsqueeze( torch.gather(top_values_flat, 0, threshold_positions_if_in), 1) is_in = torch.gt(noisy_values, threshold_if_in) threshold_positions_if_out = threshold_positions_if_in - 1 threshold_if_out = torch.unsqueeze( torch.gather(top_values_flat, 0, threshold_positions_if_out), 1) # is each value currently in the top k. normal = Normal( torch.tensor([0.0], device=clean_values.device), torch.tensor([1.0], device=clean_values.device), ) prob_if_in = normal.cdf( (clean_values - threshold_if_in) / noise_stddev) prob_if_out = normal.cdf( (clean_values - threshold_if_out) / noise_stddev) prob = torch.where(is_in, prob_if_in, prob_if_out) return prob
def predict_sentence(self, sentence_input): """Compute Sentence Score predictions.""" outputs = OrderedDict() sentence_scores = self.sentence_pred(sentence_input).squeeze() outputs[const.SENTENCE_SCORES] = sentence_scores if self.sentence_sigma: # Predict truncated Gaussian on [0,1] sigma = self.sentence_sigma(sentence_input).squeeze() outputs[const.SENT_SIGMA] = sigma outputs['SENT_MU'] = outputs[const.SENTENCE_SCORES] mean = outputs['SENT_MU'].clone().detach() # Compute log-likelihood of x given mu, sigma normal = Normal(mean, sigma) # Renormalize on [0,1] for truncated Gaussian partition_function = (normal.cdf(1) - normal.cdf(0)).detach() outputs[const.SENTENCE_SCORES] = mean + ( (sigma**2 * (normal.log_prob(0).exp() - normal.log_prob(1).exp())) / partition_function) return outputs
def forward(self, x: Tensor) -> Tensor: best_f = torch.max(self.model.y).to(x) posterior = self.model.posterior(x) mean = posterior.mean sigma = posterior.variance.sqrt() u = (mean - best_f.expand_as(mean)) / sigma norm = Normal(torch.zeros_like(u), torch.ones_like(u)) return norm.cdf(u)
def test(): relative_error = 0 for i in range(100): x = -1 + i * (10 - (-1)) / 100 my_erfcx = erfcx(torch.FloatTensor([x])) relative_error = relative_error + np.abs( my_erfcx.item() - special.erfcx(x)) / special.erfcx(x) average_error = relative_error / 100 print(average_error) normal = Normal(loc=torch.Tensor([0.0]), scale=torch.Tensor([1.0])) # cdf from 0 to x print(normal.cdf(1.6449)) print(normal.icdf(torch.Tensor([0.95])))
def expected_improvement(mean, var, reference): """ expected_improvement for minimization problems On graphs, we do not use a gradient-based optimization, thus backward does not have to be implemented :param mean: :param var: :param reference: :return: """ predictive_normal = Normal(mean.new_zeros(mean.size()), mean.new_ones(mean.size())) std = torch.sqrt(var) standardized = (-mean + reference) / std return (std * torch.exp(predictive_normal.log_prob(standardized)) + (-mean + reference) * predictive_normal.cdf(standardized)).clamp( min=0)
def forward(self, input_x): if self.unit_test_mode: seed = 979 torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) mean_out = input_x.mm(self.weight.t()) mean_out += self.bias.unsqueeze(0).expand_as(mean_out) tmp = self.keep_prob * (1 - self.keep_prob)* input_x**2 variance_out = tmp.mm(self.weight.t()**2) r = mean_out / variance_out dist = Normal(torch.zeros_like(mean_out), torch.ones_like(mean_out)) mean_out = dist.cdf(r)*mean_out +\ torch.sqrt(variance_out)*torch.exp(dist.log_prob(r)) return mean_out
def forward(self, input_x): if self.unit_test_mode: seed = 979 torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) mean_out = input_x.mm(self.weight.t()) mean_out += self.bias.unsqueeze(0).expand_as(mean_out) tmp = input_x.pow(2).mul((1 - self.keep_prob) / self.keep_prob) variance_out = tmp.mm(torch.abs(self.weight.t())**self.bo_norm) r = mean_out.div(variance_out) dist = Normal(torch.zeros_like(mean_out), torch.ones_like(mean_out)) return mean_out.mul(dist.cdf(r)).add( torch.sqrt(variance_out).mul(torch.exp(dist.log_prob(r))))
def forward(self, x): batch_size, c_in = x.size(0), x.size(1) # x.size() is (B, c_in, h, w) h_and_w = x.size()[2:] out = self.model(x) # out.size() is (B, c_in * 3 * n_components, h, w) out = out.view(batch_size, 3 * self.n_components, c_in, *h_and_w) # out.size() is (B, 3*n_components, c_in, h, w) mus, log_sigmas, weight_logits = torch.chunk(out, 3, dim=1) # (B, n_components, c_in, h, w) weights = F.softmax(weight_logits, dim=1) distribution = Normal(mus, log_sigmas.exp()) x = x.unsqueeze(1) # x.size() is (B, 1, c_in, h, w) z = distribution.cdf(x) # z.size() is (B, n_components, c_in, h, w) z = (z * weights).sum(1) # z.size() is (B, c_in, h, w) log_dz_by_dx = (distribution.log_prob(x).exp() * weights).sum(1).log() return z, log_dz_by_dx
def forward(self, candidate_set): self.grid_size = 10000 self.gp_model.eval() self.gp_model.likelihood.eval() pred = self.gp_model.likelihood(self.gp_model(candidate_set)) mu = pred.mean().detach() sigma = pred.std().detach() u = (self.best_y - mu) / sigma m = Normal(torch.Tensor([0.0]), torch.Tensor([1.0])) ucdf = m.cdf(u) updf = torch.exp(m.log_prob(u)) ei = sigma * (updf + u * ucdf) return ei
def forward(self, x: Tensor) -> Tensor: best_f = torch.max(self.model.y).to(x) posterior_norm = self.model(x) posterior_mu = posterior_norm.mean posterior_cov = posterior_norm.covariance_matrix sigma = posterior_cov.diag().sqrt().clamp_min(1e-9).view(posterior_mu.shape) u = (posterior_mu - best_f.expand_as(posterior_mu) - self.alpha) / sigma normal = Normal(torch.zeros_like(u), torch.ones_like(u)) ucdf = normal.cdf(u) updf = torch.exp(normal.log_prob(u)) ei = sigma * (updf + u * ucdf) ei[torch.isnan(ei)] = 0.0 return ei
class ProbabilisticActionSelector(object): def __init__(self, policy_net, INITIAL_EPSILON, FINAL_EPSILON, EPS_DECAY, n_actions, device): self._eps = INITIAL_EPSILON self._FINAL_EPSILON = FINAL_EPSILON self._INITIAL_EPSILON = INITIAL_EPSILON self._policy_net = policy_net self._EPS_DECAY = EPS_DECAY self._n_actions = n_actions self._device = device self._dist = Normal(0, 1) def select_action(self, state, training=True): sample = random.random() if training: self._eps -= (self._INITIAL_EPSILON - self._FINAL_EPSILON) / self._EPS_DECAY self._eps = max(self._eps, self._FINAL_EPSILON) action_mean = None action_var = None if sample > self._eps: with torch.no_grad(): q_vals = torch.zeros( (self._policy_net.get_num_ensembles(), self._n_actions)) state = state.to(self._device) for i in range(self._policy_net.get_num_ensembles()): q_vals[i, :] = self._policy_net( state, ens_num=i).to('cpu').squeeze(0) action_mean = torch.mean(q_vals, 0) action_var = torch.var(q_vals, 0) top_idx = torch.argmax(action_mean) score = torch.zeros((self._n_actions)) for i in range(self._n_actions): normal_val = (action_mean[top_idx] - action_mean[i]) / \ (action_var[top_idx] + action_var[i]) score[i] = 1. - self._dist.cdf(normal_val) action_dist = Multinomial(1, score) a = action_dist.sample().argmax().item() else: a = torch.tensor([[random.randrange(self._n_actions)]], device='cpu', dtype=torch.long).numpy()[0, 0].item() return a, self._eps, action_mean, action_var
class labels_transformer(): def __init__(self): self.labels_mean = torch.FloatTensor([ 0.4761464174454829, 0.5202864583333333, 0.5481813186813186, 0.5227313915857604, 0.5037803738317757, 0.5662814814814815 ]) self.labels_std = torch.FloatTensor([ 0.15228452985134602, 0.15353347248058757, 0.13637365282783034, 0.15520650375390665, 0.15013557786759546, 0.14697755975897248 ]) self.dist = Normal(0, 1) def transform_labels(self, true_labels): pseudo_labels = (true_labels - self.labels_mean) / self.labels_std pseudo_labels = self.dist.cdf(pseudo_labels) return pseudo_labels def inverse_transform_labels(self, pseudo_labels): true_labels = self.dist.icdf(pseudo_labels) true_labels = true_labels * self.labels_std + self.labels_mean return true_labels