def forward(self, obs, deterministic=False, with_logprob=True): net_out = self.net(obs) mu = self.mu_layer(net_out) log_std = self.log_std_layer(net_out) log_std = torch.clamp(log_std, LOG_STD_MIN, LOG_STD_MAX) std = torch.exp(log_std) # Pre-squash distribution and sample pi_distribution = Normal(mu, std) if deterministic: # Only used for evaluating policy at test time. pi_action = mu else: pi_action = pi_distribution.rsample() if with_logprob: # Compute logprob from Gaussian, and then apply correction for Tanh squashing. # NOTE: The correction formula is a little bit magic. To get an understanding # of where it comes from, check out the original SAC paper (arXiv 1801.01290) # and look in appendix C. This is a more numerically-stable equivalent to Eq 21. # Try deriving it yourself as a (very difficult) exercise. :) logp_pi = pi_distribution.log_prob(pi_action).sum(axis=-1) logp_pi -= ( 2 * (np.log(2) - pi_action - F.softplus(-2 * pi_action))).sum( axis=1) real_logp_pi = pi_distribution.log_prob(pi_action) else: logp_pi = None real_logp_pi = None pi_action = torch.tanh(pi_action) pi_action = self.act_limit * pi_action return pi_action, logp_pi, real_logp_pi
def forward(self, x): """ x (torch.Tensor): Input images [batch size, 3, dim, dim] """ # Forward propagation recon, stats = self.vae(x) if self.pixel_bound: recon = torch.sigmoid(recon) # Reconstruction loss p_xr = Normal(recon, self.pixel_std) err = -p_xr.log_prob(x).sum(dim=(1, 2, 3)) # KL divergence loss p_z = Normal(0, 1) # TODO(martin): the parsing below is not very intuitive # -- No flow if 'z' in stats: q_z = Normal(stats.mu, stats.sigma) kl = q_z.log_prob(stats.z) - p_z.log_prob(stats.z) kl = kl.sum(dim=1) # -- Using normalising flow else: q_z_0 = Normal(stats.mu_0, stats.sigma_0) kl = q_z_0.log_prob(stats.z_0) - p_z.log_prob(stats.z_k) kl = kl.sum(dim=1) - stats.ldj # Tracking losses = AttrDict(err=err, kl_l=kl) return recon, losses, stats, None, None
def compute_stochastic_elbo(a, b, nu, omega, x, y, a_0, b_0, mu_0): """ Return a monte-carlo estimate of the ELBO, using a single sample from Q(sigma^-2, beta) a, b are the Gamma 'shape' and 'rate' parameters for the variational posterior over *precision*: q(tau) = q(sigma^-2) nu_k, omega_k are Normal 'mean' and 'precision' parameters for the variational posterior over weights: q(beta_k) x is an n by k matrix, where each row contains the regression inputs [1, x, x^2, x^3] y is an n by 1 values a_0, b_0 the parameters for the Gamma prior over precision P(tau) = P(sigma^-2) mu_0 is the mean of the Gamma prior on weights beta """ # Define mean field variational distribution over (beta, tau). Q_beta = Normal(nu, omega**-0.5) Q_tau = Gamma(a, b) # Sample from variational distribution: (tau, beta) ~ Q # Use rsample to make sure that the result is differentiable. tau = Q_tau.rsample() sigma = tau**-0.5 beta = Q_beta.rsample() # Create a single sample monte-carlo estimate of ELBO. P_tau = Gamma(a_0, b_0) P_beta = Normal(mu_0, sigma) P_y = Normal((beta[None, :]*x).sum(dim=1, keepdim=True), sigma) kl_tau = Q_tau.log_prob(tau) - P_tau.log_prob(tau) kl_beta = Q_beta.log_prob(beta).sum() - P_beta.log_prob(beta).sum() log_likelihood = P_y.log_prob(y).sum() elbo = log_likelihood - kl_tau - kl_beta return elbo
def log_likelihood(self, x_norm, y_norm): mean, var, shape, rate, mixture_var = self(x_norm) norm_dist = Normal(mean, torch.sqrt(var)) gamma_dist = Gamma(shape, rate) y = y_norm * self.y_std + self.y_mean + 10**(-4) only_normal_bool = (torch.abs(1 - mixture_var) < 10**(-4)).type( torch.float) only_gamma_bool = (mixture_var < 10**(-4)).type(torch.float) normal_component = norm_dist.log_prob(y_norm) + torch.log(mixture_var) gamma_component = gamma_dist.log_prob(y) + torch.log(1 - mixture_var) logging.debug('shape,rate: {:.3f}, {:.3f}'.format( float(shape.mean()), float(rate.mean()))) combined_tensor = torch.stack((normal_component, gamma_component), dim=0) old_output = torch.logsumexp(combined_tensor, dim=0).mean() output = (torch.log((1 - only_gamma_bool) * mixture_var * torch.exp(norm_dist.log_prob(y_norm)) + ((1 - only_normal_bool) * (1 - mixture_var) * torch.exp(gamma_dist.log_prob(y))))).mean() logging.debug('Mixture var: {}'.format(float(mixture_var.mean()))) logging.debug('NLLs: {:.3f}, {:.3f}'.format( -float(norm_dist.log_prob(y_norm).mean()), -float(gamma_dist.log_prob(y).mean()))) logging.debug('Combined NLL: {:.3f} or {:.3f}'.format( -float(output), -float(old_output))) return output
def test_gmm_loss(self): # seq_len x batch_size x gaussian_size x feature_size # 1 x 1 x 2 x 2 mus = torch.Tensor([[[[0.0, 0.0], [6.0, 6.0]]]]) sigmas = torch.Tensor([[[[2.0, 2.0], [2.0, 2.0]]]]) # seq_len x batch_size x gaussian_size pi = torch.Tensor([[[0.5, 0.5]]]) logpi = torch.log(pi) # seq_len x batch_size x feature_size batch = torch.Tensor([[[3.0, 3.0]]]) gl = gmm_loss(batch, mus, sigmas, logpi) # first component, first dimension n11 = Normal(mus[0, 0, 0, 0], sigmas[0, 0, 0, 0]) # first component, second dimension n12 = Normal(mus[0, 0, 0, 1], sigmas[0, 0, 0, 1]) p1 = (pi[0, 0, 0] * torch.exp(n11.log_prob(batch[0, 0, 0])) * torch.exp(n12.log_prob(batch[0, 0, 1]))) # second component, first dimension n21 = Normal(mus[0, 0, 1, 0], sigmas[0, 0, 1, 0]) # second component, second dimension n22 = Normal(mus[0, 0, 1, 1], sigmas[0, 0, 1, 1]) p2 = (pi[0, 0, 1] * torch.exp(n21.log_prob(batch[0, 0, 0])) * torch.exp(n22.log_prob(batch[0, 0, 1]))) logger.info( "gmm loss={}, p1={}, p2={}, p1+p2={}, -log(p1+p2)={}".format( gl, p1, p2, p1 + p2, -torch.log(p1 + p2))) assert -torch.log(p1 + p2) == gl
class GaussianModel(nn.Module): r""" Model to learn a univariate Gaussian distribution. Arguments ---------- mu: Mean of the Gaussian distribution sigma: Standard deviation of the Gaussian distribution device: The torch.device to use, typically cpu or gpu id """ def __init__(self, mu, sigma, device=None): super(GaussianModel, self).__init__() if device is not None: self.device = device mu = mu.to(device) sigma = sigma.to(device) self.mu = mu self.sigma = sigma self.distr = Normal(self.mu, self.sigma) def to_device(self, device): """ Moves members to a specified torch.device """ self.device = device def forward(self, x): """ Takes input x as new distribution parameters """ # If mini-batching if len(x.shape) > 1: self.mu_batch = x[:, 0] self.sigma_batch = F.softplus(x[:, 1]) # If not mini-batching else: self.mu = x[0] self.distr = Normal(self.mu, self.sigma) return self.distr def log_prob(self, x): x = x.view(x.shape.numel()) if x.shape[0] == 1: return self.distr.log_prob(x[0]).view(1) log_like_arr = torch.ones_like(x) for i in range(len(x)): self.mu = self.mu_batch[i] self.distr = Normal(self.mu, self.sigma) lpxx = self.distr.log_prob(x[i]).view(1) log_like_arr[i] = lpxx lpx = log_like_arr return lpx def icdf(self, value): return self.distr.icdf(value)
def forward(self, x, a=None): mu = self.p_net(x) policy = Normal(mu, self.log_std.exp()) pi = policy.sample() logp = policy.log_prob(a).sum(dim=1) if torch.is_tensor(a) else None logp_pi = policy.log_prob(pi).sum(dim=1) return pi, logp, logp_pi
def forward(self, z_where_t, z_where_t_1=None, disp=None): S, B, D = z_where_t.shape if z_where_t_1 is None: p0 = Normal(self.prior_mu0, self.prior_Sigma0) return p0.log_prob(z_where_t).sum(-1)# S * B else: p0 = Normal(z_where_t_1, self.prior_Sigmat) return p0.log_prob(z_where_t).sum(-1) # S * B
def actor(self, obs, action=None, shared=None): if shared is None: shared = self.shared_body(toTensor(obs)) action_mean = self.fc_action(self.actor_body(shared)) action_dist = Normal(action_mean, F.softplus(self.std)) if action is None: action = action_dist.sample() return action, action_dist.log_prob(action).sum(-1) else: return action_dist.log_prob(action).sum(-1)
def expected_log_pdf(i): if i < self.n_speakers: qd1 = torch.zeros_like(logspec0) for c in range(self.gmm['n_components']): pd_x = Normal(self.gmm['means'][c], self.gmm['stds'][c]) qd1 += self.qz[i,c][:,None] * pd_x.log_prob(logspec0) else: pd_x = self.noise_model qd1 = pd_x.log_prob(logspec0) return qd1
def forward(self, x, a=None): policy = Normal(self.mu(x), self.log_std.exp()) pi = policy.sample() logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi
def pathological_mixture(x): #x = x * (1.0 + 0.0175 * torch.randn(1)) mix1 = Normal(torch.zeros(1), torch.tensor([0.5])) mix2 = Normal(torch.tensor([1.0]), torch.tensor([0.15])) #logsumexp trick m1 = mix1.log_prob(x) m2 = mix2.log_prob(x).mul(200) out = LogSumExp(torch.cat((m1.view(-1, 1), m2.view(-1, 1)), dim=1), dim=1) return out
def forward(self, x: torch.Tensor, a: torch.Tensor) \ -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: mu = self.mu(x, a) policy = Normal(mu, self.log_std.exp()) pi = policy.sample() logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi
def update_qZ(self, logspec0): '''Updates q(Z) approximate posterior. Args: logspec0 (torch.Tensor): Spectral features of shape (T,F) ''' mu_vs, logvar_vs, mu_us, logvar_us = self.qz param_to_optimize = (*self.qz, ) optim_z = optim.SGD(param_to_optimize, lr=self.qz_learn_rate) n_t = logspec0.shape[0] for i_iter in range(self.qz_n_updates): optim_z.zero_grad() kls, explogliks = [], [] for _ in range(self.n_z_samples): # sample q(Z1), q(Z2) u_samp = sample_normal(mu_us, logvar_us) v_samp = sample_normal(mu_vs, logvar_vs) # forward Z1, Z2 through VAE decoder mu_x, logvar_x = self.vae.emission(v_samp, u_samp, [n_t] * self.n_speakers) mu_x, logvar_x = mu_x[:, :n_t, :], logvar_x[:, :n_t, :] pd_x = Normal(mu_x, torch.exp(0.5 * logvar_x)) # compute loss Eq.(21) kluv, _ = self.vae.kl_divergence_expected_speaker( ((mu_vs, logvar_vs), (mu_us, logvar_us), u_samp, v_samp), torch.tensor([n_t])) kls.append(kluv.sum() / self.n_z_samples) exploglik = self.qd[:self.n_speakers] * torch.clamp( pd_x.log_prob(logspec0), -14, 100) exploglik += (1 - self.qd[:self.n_speakers]) * ( pd_x.cdf(logspec0) + 1e-6).log() explogliks.append(exploglik.sum() / self.n_z_samples) pd_x = self.noise_model exploglik = self.qd[-1] * torch.clamp(pd_x.log_prob(logspec0), -14, 100) exploglik += (1 - self.qd[-1]) * (pd_x.cdf(logspec0) + 1e-6).log() explogliks.append(exploglik.sum()) objf = -(torch.sum(torch.stack(explogliks)) - self.kl_weight * torch.sum(torch.stack(kls))) objf.backward() for p in param_to_optimize: nn.utils.clip_grad_value_(p, 5) optim_z.step() self.qz = (mu_vs, logvar_vs, mu_us, logvar_us)
def select_action(state, env, teacher_mod, teacher_student): state = torch.from_numpy(state).float() mu1, s1, mu2, s2, val1, val2 = model(state) tmu1, ts1, tmu2, ts2, tval1, tval2 = teacher_mod(state) if env == 1: prob = Normal(tmu1, ts1.sqrt()) entropy = 0.5*((ts1*2*pi).log()+1) action = prob.sample() log_prob_t = prob.log_prob(action) # model.entropies.append(entropy) # teacher_mod.saved_actions_env1.append(SavedAction(log_prob, # tval1)) teacher_mod.saved_actions_env1.append((tmu1, ts1)) prob = Normal(mu1, s1.sqrt()) entropy = 0.5*((s1*2*pi).log()+1) action = prob.sample() log_prob_s = prob.log_prob(action) model.entropies.append(entropy) model.samples_student.append((mu1, s1)) if teacher_student == 1: # Randomly save student model.saved_actions_student[env].append(SavedAction(log_prob_s, val1)) else: model.saved_actions_student[env].append(SavedAction(log_prob_t, tval1)) elif env == 2: prob = Normal(tmu2, ts2.sqrt()) entropy = 0.5*((ts2*2*pi).log()+1) action = prob.sample() log_prob_t = prob.log_prob(action) # model.entropies.append(entropy) teacher_mod.saved_actions_env1.append(SavedAction(log_prob_t, tval2)) # model.samples_teacher[2].append((tmu2, ts2)) prob = Normal(mu2, s2.sqrt()) entropy = 0.5 *((s2*2*pi).log()+1) action = prob.sample() log_prob_s = prob.log_prob(action) model.entropies.append(entropy) model.samples_student.append((mu2, s2)) if teacher_student == 1: # Randomly save student or teacher model.saved_actions_student[env].append(SavedAction(log_prob_s, val2)) else: model.saved_actions_student[env].append(SavedAction(log_prob_t, tval2)) return action.item()
class ScaleMixturePrior(): def __init__(self, pi, sigma1, sigma2): self.pi = pi self.sigma1 = sigma1 self.sigma2 = sigma2 self.normal1 = Normal(0, sigma1) self.normal2 = Normal(0, sigma2) def log_prob(self, x): p1 = torch.exp(self.normal1.log_prob(x)) p2 = torch.exp(self.normal2.log_prob(x)) return torch.sum(self.pi * p1 + (1 - self.pi) * p2)
def step(self, state): state = torch.flatten(torch.from_numpy(state).float()) mean, sigma = self(state) dist = Normal(mean, sigma) action = dist.sample() action = action.view(self.action_shape) print("log dist", dist.log_prob(action)) normal_dist = torch.normal(mean, sigma) prob = torch.normal(action, mean, sigma) print("log prob", torch.log(prob)) return action.numpy(), dist.log_prob(action)
def forward(self, x, a=None): mu = self.mu(x) std = self.log_std.exp() policy = Normal(mu, std) pi = policy.sample() # gaussian likelihood logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi, mu # 순서 ActorCritic return 값이랑 맞춤.
class ScaledGaussianMixture(Parameter): """Scaled Gaussian Mixture Scaled Mixture of Gaussians. Do not compute samples as this distribution is only used for the weight priors. Attributes: pi (float): interpolation factor between the two gaussians basis sigma1 (float): sigma for the first gaussian sigma2 (float): sigma for the second gaussian gaussian1 (Normal): normal distribution for the first gaussian gaussian2 (Normal): normal distribution for the second gaussian """ def __init__(self, pi: float, sigma1: float, sigma2: float) -> None: """Initialize Arguments: pi (float): interpolation factor between the two gaussians basis sigma1 (float): sigma for the first gaussian sigma2 (float): sigma for the second gaussian """ super(ScaledGaussianMixture, self).__init__() self.register_parameter("pi", nn.Parameter(torch.tensor(pi).float(), requires_grad = False)) self.register_parameter("sigma1", nn.Parameter(torch.tensor(sigma1).float(), requires_grad = False)) self.register_parameter("sigma2", nn.Parameter(torch.tensor(sigma2).float(), requires_grad = False)) self.register_parameter("zero", nn.Parameter(torch.tensor(0.).float(), requires_grad = False)) self.gaussian1 = Normal(self.zero, self.sigma1) self.gaussian2 = Normal(self.zero, self.sigma2) def sample(self) -> Tensor: """Sample Is not implemented for now for reasons stated above. Thus returns 0.0 for the moment. """ return 0.0 def log_prob(self, input: Tensor) -> Tensor: """Scale Gaussian Mixture Log Probability Arguments: input (Tensor): sampled value of the gaussian weight Returns: Tensor: log probability """ prob1 = torch.exp(self.gaussian1.log_prob(input)) prob2 = torch.exp(self.gaussian2.log_prob(input)) return torch.log(self.pi * prob1 + (1.0 - self.pi) * prob2).sum()
def plot_dist2(n_components, mixture_weights, true_mixture_weights, exp_dir, name=''): # mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0) rows = 1 cols = 1 fig = plt.figure(figsize=(10+cols,4+rows), facecolor='white') #, dpi=150) col =0 row = 0 ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1) # xs = np.linspace(-9,205, 300) xs = np.linspace(-10,n_components*10 +5, 300) sum_ = np.zeros(len(xs)) # C = 20 for c in range(n_components): m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) ys = [] for x in xs: component_i = (torch.exp(m.log_prob(x) )* mixture_weights[c]).detach().cpu().numpy() ys.append(component_i) ys = np.reshape(np.array(ys), [-1]) sum_ += ys ax.plot(xs, ys, label='', c='orange') ax.plot(xs, sum_, label='current', c='r') sum_ = np.zeros(len(xs)) for c in range(n_components): m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) ys = [] for x in xs: component_i = (torch.exp(m.log_prob(x) )* true_mixture_weights[c]).detach().cpu().numpy() ys.append(component_i) ys = np.reshape(np.array(ys), [-1]) sum_ += ys ax.plot(xs, ys, label='', c='c') ax.plot(xs, sum_, label='true', c='b') ax.legend() ax.set_title(str(mixture_weights) +'\n'+str(true_mixture_weights), size=8, family='serif') # save_dir = home+'/Documents/Grad_Estimators/GMM/' plt_path = exp_dir+'gmm_plot_dist'+name+'.png' plt.savefig(plt_path) print ('saved training plot', plt_path) plt.close()
def forward(self, x, S): x = x.view(-1, self.x_dim) bsz = x.size(0) ### get w and \alpha and L(\theta) mu, logvar = self.encoder(x) q_phi = Normal(loc=mu, scale=torch.exp(0.5 * logvar)) z_q = q_phi.rsample((S, )) recon_batch = self.decoder(z_q) x_dist = Bernoulli(logits=recon_batch) log_lik = x_dist.log_prob(x).sum(-1) log_prior = self.prior.log_prob(z_q).sum(-1) log_q = q_phi.log_prob(z_q).sum(-1) log_w = log_lik + log_prior - log_q tmp_alpha = torch.logsumexp(log_w, dim=0).unsqueeze(0) alpha = torch.exp(log_w - tmp_alpha).detach() if self.version == 'v1': p_loss = -alpha * (log_lik + log_prior) ### get moment-matched proposal mu_r = alpha.unsqueeze(2) * z_q mu_r = mu_r.sum(0).detach() z_minus_mu_r = z_q - mu_r.unsqueeze(0) reshaped_diff = z_minus_mu_r.view(S * bsz, -1, 1) reshaped_diff_t = reshaped_diff.permute(0, 2, 1) outer = torch.bmm(reshaped_diff, reshaped_diff_t) outer = outer.view(S, bsz, self.z_dim, self.z_dim) Sigma_r = outer.mean(0) * S / (S - 1) Sigma_r = Sigma_r + torch.eye(self.z_dim).to(device) * 1e-6 ## ridging ### get v, \beta, and L(\phi) L = torch.cholesky(Sigma_r) r_phi = MultivariateNormal(loc=mu_r, scale_tril=L) z = r_phi.rsample((S, )) z_r = z.detach() recon_batch_r = self.decoder(z_r) x_dist_r = Bernoulli(logits=recon_batch_r) log_lik_r = x_dist_r.log_prob(x).sum(-1) log_prior_r = self.prior.log_prob(z_r).sum(-1) log_r = r_phi.log_prob(z_r) log_v = log_lik_r + log_prior_r - log_r tmp_beta = torch.logsumexp(log_v, dim=0).unsqueeze(0) beta = torch.exp(log_v - tmp_beta).detach() log_q = q_phi.log_prob(z_r).sum(-1) q_loss = -beta * log_q if self.version == 'v2': p_loss = -beta * (log_lik_r + log_prior_r) rem_loss = torch.sum(q_loss + p_loss, 0).sum() return rem_loss
def loglik(self, y_pred, y_obs): if self.likelihood == "Gaussian": sigma = 1e-6 + softplus(self.noise_sd) p_data = Normal(loc=y_pred, scale=sigma) loglik = p_data.log_prob(y_obs).sum() elif self.likelihood == "Bernoulli": p_data = Bernoulli(logits=y_pred) loglik = p_data.log_prob(y_obs).sum() else: raise NotImplementedError("Other likelihoods not implemented") return loglik
def forward(self, x, a=None, batch = False): #pdb.set_trace() policy = Normal(self.mu(x), self.log_std.exp()) if batch: pdb.set_trace() pi = policy.sample() logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi
def forward(self, x, with_logprob=False): x = self.layers(x) mean = self.mean_layer(x) std = self.log_std_layer(x).clamp(-20, 2).exp() pi_distribution = Normal(mean, std) pi_action = pi_distribution.rsample() if with_logprob: # Compute logprob from Gaussian, and then apply correction for Tanh squashing. # NOTE: The correction formula is a little bit magic. To get an understanding # of where it comes from, check out the original SAC paper (arXiv 1801.01290) # and look in appendix C. This is a more numerically-stable equivalent to Eq 21. # Try deriving it yourself as a (very difficult) exercise. :) logp = pi_distribution.log_prob(pi_action).sum(axis=-1) logp -= (2 * (np.log(2) - pi_action - F.softplus(-2 * pi_action))).sum( axis=1) else: logp = None x = torch.tanh(pi_action) # [N, action_dim] # scale (-1, 1) to [action.low, action_high] action = (x + 1) * (self.action_high - self.action_low) / 2 + self.action_low if with_logprob: return (action, logp) else: return action
def logprob_undercomponent(x, component): B = x.shape[0] mean = (component.float()*10.).view(B,1) std = (torch.ones([B]) *5.).view(B,1) m = Normal(mean.cuda(), std.cuda()) logpx_given_z = m.log_prob(x) return logpx_given_z
def choose_action(self, observation): state = T.tensor([observation], dtype=T.float).to(self.actor.device) value = self.critic(state) mu, sigma = self.actor(state) probabilities = Normal(mu, sigma) actions = probabilities.sample( ) # NOT have grad_fn, cannot do actions.backward() action = T.tanh(actions) * T.tensor(self.max_action).to( self.actor.device).float( ) # 1. scale action to fit the environment # 2. action casted to float so that can be used by T.cat, otherwise it is double type log_probs = probabilities.log_prob( actions) # to calculate the loss function log_probs -= T.log( 1 - action.pow(2) + self.reparam_noise ) # handle the scaling of action (as we use tanh to scale) log_probs = log_probs.sum( 1, keepdim=True ) # 0-axis: batch, 1-axis: components of actions, summed over to get a scalar action = T.squeeze( action).detach().numpy() # remove the dimension which equals 1 probs = T.squeeze(log_probs).item() value = T.squeeze(value).item() return action, probs, value
def log_forward(self, x): out = torch.Tensor(x).reshape(-1, self.in_dim) out = self.l1(out) out = self.leaky_relu(out) out = self.l2(out) out = self.leaky_relu(out) out = self.l3(out) out = self.leaky_relu(out) out = self.l4(out) #out = self.tanh(out) mu = self.mu_linear(out) log_std = self.log_linear(out) log_std = torch.clamp(log_std, -20, 2) std = torch.exp(log_std) distribution = Normal(mu, std) action = distribution.rsample() log_p = distribution.log_prob(action) log_p -= (2 * (np.log(2) - action - F.softplus(-2 * action))) action = torch.tanh(action) return action, log_p
def compute_sgd_approx_lr(lr=0.01): #initialize x = 3.0 * torch.randn(1, requires_grad=False) x.requires_grad = True ### run sgd optim = SGD([x], lr=lr) num_steps = 100 all_x = torch.zeros(num_steps) for i in range(num_steps): all_x[i] = x.data #print(x) optim.zero_grad() loss = -pathological_mixture(x) loss.backward() optim.step() ## compute swa distribution swa_estimate = all_x[int(num_steps / 2):].mean() swa_std = all_x[int(num_steps / 2):].std() * 1 / math.sqrt( int(num_steps / 2)) swa_dist = Normal(swa_estimate, swa_std) swa_nll = -swa_dist.log_prob(test_pts) return swa_nll, swa_estimate, swa_std
def logprob_undercomponent(x, component): B = x.shape[0] mean = (component.float() * 10.).view(B, 1) std = (torch.ones([B]) * 5.).view(B, 1) m = Normal(mean.cuda(), std.cuda()) logpx_given_z = m.log_prob(x) return logpx_given_z
def learn(self, s, a, td): s = torch.from_numpy(s[np.newaxis, :]).float() td_no_grad = td.detach() mu, sigma = torch.squeeze(self.mu(self.l1(s))), torch.squeeze( self.sigma(self.l1(s))) normal_dist = Normal(mu * 2, sigma + 0.1) # action = torch.clamp(normal_dist.sample(1), self.action_bound[0], self.action_bound[1]) log_prob = normal_dist.log_prob(torch.from_numpy(a)) self.exp_v = log_prob * td_no_grad self.exp_v += 0.01 * normal_dist.entropy() self.exp_v = -self.exp_v optimizer = optim.Adam([{ 'params': self.l1.parameters() }, { 'params': self.sigma.parameters() }, { 'params': self.mu.parameters() }], lr=self.lr) # optimize the model optimizer.zero_grad() self.exp_v.backward() optimizer.step() return -self.exp_v
def gmm_loss(batch, mus, sigmas, logpi, reduce=True): # pylint: disable=too-many-arguments """ Computes the gmm loss. Compute minus the log probability of batch under the GMM model described by mus, sigmas, pi. Precisely, with bs1, bs2, ... the sizes of the batch dimensions (several batch dimension are useful when you have both a batch axis and a time step axis), gs the number of mixtures and fs the number of features. :args batch: (bs1, bs2, *, fs) torch tensor :args mus: (bs1, bs2, *, gs, fs) torch tensor :args sigmas: (bs1, bs2, *, gs, fs) torch tensor :args logpi: (bs1, bs2, *, gs) torch tensor :args reduce: if not reduce, the mean in the following formula is ommited :returns: loss(batch) = - mean_{i1=0..bs1, i2=0..bs2, ...} log( sum_{k=1..gs} pi[i1, i2, ..., k] * N( batch[i1, i2, ..., :] | mus[i1, i2, ..., k, :], sigmas[i1, i2, ..., k, :])) NOTE: The loss is not reduced along the feature dimension (i.e. it should scale ~linearily with fs). """ batch = batch.unsqueeze(-2) normal_dist = Normal(mus, sigmas) g_log_probs = normal_dist.log_prob(batch) g_log_probs = logpi + torch.sum(g_log_probs, dim=-1) max_log_probs = torch.max(g_log_probs, dim=-1, keepdim=True)[0] g_log_probs = g_log_probs - max_log_probs g_probs = torch.exp(g_log_probs) probs = torch.sum(g_probs, dim=-1) log_prob = max_log_probs.squeeze() + torch.log(probs) if reduce: return -torch.mean(log_prob) return -log_prob
def forward(self, inputs, c=None): inputs_permuted = inputs.transpose(0,1) # |D| * batch * ... embeddings = [self.enc(x) for x in inputs_permuted] mean_embedding = sum(embeddings)/len(embeddings) mu_c = self.mu_c(mean_embedding) sigma_c = self.sigma_c(mean_embedding) dist = Normal(mu_c, sigma_c) if c is None: c = dist.rsample() return c, dist.log_prob(c).sum(dim=1) # Return value, score
def forward(self, inputs, c, z=None): inputs = inputs.view(-1, 1, 28, 28) #huh? mu = self.localization_mu(inputs) sigma = self.localization_sigma(inputs) dist = Normal(mu, sigma) if z is None: z = dist.rsample() score = dist.log_prob(z).sum(dim=1).sum(dim=1).sum(dim=1) return z, score
def forward(self, inputs, c=None): # transform the input xs = [self.stn(inputs[:,i,:,:,:]) for i in range(inputs.size(1))] embs = [self.conv_post_stn(x) for x in xs] emb = sum(embs)/len(embs) mu = self.conv_mu(emb) sigma = self.conv_sigma(emb) dist = Normal(mu, sigma) if c is None: c = dist.rsample() return c, dist.log_prob(c).sum(dim=1).sum(dim=1).sum(dim=1)
def logprob_givenmixtureeweights(x, needsoftmax_mixtureweight): mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0) probs_sum = 0# = [] for c in range(n_components): m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) # for x in xs: component_i = torch.exp(m.log_prob(x))* mixture_weights[c] #.numpy() # probs.append(probs) probs_sum+=component_i logprob = torch.log(probs_sum) return logprob
def get_log_prob(self, state, squashed_action): """ Action is expected to be squashed with tanh """ with torch.no_grad(): loc, scale_log = self._get_loc_and_scale_log(state) # This is not getting exported; we can use it n = Normal(loc, scale_log.exp()) raw_action = self._atanh(squashed_action) log_prob = torch.sum( n.log_prob(raw_action) - self._squash_correction(squashed_action), dim=1 ).reshape(-1, 1) return log_prob
def plot_dist(x=None): if x is None: x1 = sample_true(1).cuda() else: x1 = x[0].cpu().numpy()#.view(1,1) # print (x) mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0) rows = 1 cols = 1 fig = plt.figure(figsize=(10+cols,4+rows), facecolor='white') #, dpi=150) col =0 row = 0 ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1) xs = np.linspace(-9,205, 300) sum_ = np.zeros(len(xs)) C = 20 for c in range(C): m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) ys = [] for x in xs: # component_i = (torch.exp(m.log_prob(x) )* ((c+5.) / 290.)).numpy() component_i = (torch.exp(m.log_prob(x) )* mixture_weights[c]).detach().cpu().numpy() ys.append(component_i) ys = np.reshape(np.array(ys), [-1]) sum_ += ys ax.plot(xs, ys, label='') ax.plot(xs, sum_, label='') # print (x) ax.plot([x1,x1+.001],[0.,.002]) # fasda # save_dir = home+'/Documents/Grad_Estimators/GMM/' plt_path = exp_dir+'gmm_plot_dist.png' plt.savefig(plt_path) print ('saved training plot', plt_path) plt.close()
def true_posterior(x, needsoftmax_mixtureweight): mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0) probs_ = [] for c in range(n_components): m = Normal(torch.tensor([c*10.]).float().cuda(), torch.tensor([5.0]).float().cuda()) component_i = torch.exp(m.log_prob(x))* mixture_weights[c] #.numpy() # print(component_i.shape) # fsdf probs_.append(component_i[0]) probs_ = torch.stack(probs_) probs_ = probs_ / torch.sum(probs_) # print (probs_.shape) # fdssdfd # logprob = torch.log(probs_sum) return probs_
def logprob_undercomponent(x, component, needsoftmax_mixtureweight, cuda=False): # c= component # C = c. B = x.shape[0] # print() # print (needsoftmax_mixtureweight.shape) mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0) # print (mixture_weights.shape) # fdsfa # probs_sum = 0# = [] # for c in range(n_components): # m = Normal(torch.tensor([c*10.]).float().cuda(), torch.tensor([5.0]).float() )#.cuda()) mean = (component.float()*10.).view(B,1) std = (torch.ones([B]) *5.).view(B,1) # print (mean.shape) #[B] if not cuda: m = Normal(mean, std)#.cuda()) else: m = Normal(mean.cuda(), std.cuda()) # for x in xs: # component_i = torch.exp(m.log_prob(x))* mixture_weights[c] #.numpy() # print (m.log_prob(x)) # print (torch.log(mixture_weights[c])) # print(x.shape) logpx_given_z = m.log_prob(x) logpz = torch.log(mixture_weights[component]).view(B,1) # print (px_given_z.shape) # print (component) # print (mixture_weights) # print (mixture_weights[component]) # print (torch.log(mixture_weights[component]).shape) # fdsasa # print (logpx_given_z.shape) # print (logpz.shape) # fsdfas logprob = logpx_given_z + logpz # print (logprob.shape) # fsfd # probs.append(probs) # probs_sum+=component_i # logprob = torch.log(component_i) return logprob
def logprob_undercomponent(x, component, needsoftmax_mixtureweight, cuda=False): c= component # print (needsoftmax_mixtureweight.shape) mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0) # probs_sum = 0# = [] # for c in range(n_components): # m = Normal(torch.tensor([c*10.]).float().cuda(), torch.tensor([5.0]).float() )#.cuda()) if not cuda: m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float() )#.cuda()) else: m = Normal(torch.tensor([c*10.]).float().cuda(), torch.tensor([5.0]).float().cuda()) # for x in xs: # component_i = torch.exp(m.log_prob(x))* mixture_weights[c] #.numpy() # print (m.log_prob(x)) # print (torch.log(mixture_weights[c])) logprob = m.log_prob(x) + torch.log(mixture_weights[c]) # probs.append(probs) # probs_sum+=component_i # logprob = torch.log(component_i) return logprob
def gmm_loss(batch, mus, sigmas, logpi, reduce=True): # pylint: disable=too-many-arguments """ Computes the gmm loss. Compute minus the log probability of batch under the GMM model described by mus, sigmas, pi. Precisely, with bs1, bs2, ... the sizes of the batch dimensions (several batch dimension are useful when you have both a batch axis and a time step axis), gs the number of mixtures and fs the number of features. :args batch: (bs1, bs2, *, fs) torch tensor :args mus: (bs1, bs2, *, gs, fs) torch tensor :args sigmas: (bs1, bs2, *, gs, fs) torch tensor :args logpi: (bs1, bs2, *, gs) torch tensor :args reduce: if not reduce, the mean in the following formula is ommited :returns: loss(batch) = - mean_{i1=0..bs1, i2=0..bs2, ...} log( sum_{k=1..gs} pi[i1, i2, ..., k] * N( batch[i1, i2, ..., :] | mus[i1, i2, ..., k, :], sigmas[i1, i2, ..., k, :])) NOTE: The loss is not reduced along the feature dimension (i.e. it should scale ~linearily with fs). """ batch = batch.unsqueeze(-2) normal_dist = Normal(mus, sigmas) g_log_probs = normal_dist.log_prob(batch) g_log_probs = logpi + torch.sum(g_log_probs, dim=-1) max_log_probs = torch.max(g_log_probs, dim=-1, keepdim=True)[0] g_log_probs = g_log_probs - max_log_probs g_probs = torch.exp(g_log_probs) probs = torch.sum(g_probs, dim=-1) log_prob = max_log_probs.squeeze() + torch.log(probs) if reduce: return - torch.mean(log_prob) return - log_prob
def plot_both_dists(): # needsoftmax_mixtureweight = needsoftmax_mixtureweight.cpu() #MAKE PLOT OF DISTRIBUTION rows = 1 cols = 1 fig = plt.figure(figsize=(10+cols,4+rows), facecolor='white') #, dpi=150) col =0 row = 0 ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1) xs = np.linspace(-9,205, 300) sum_ = np.zeros(len(xs)) # C = 20 for c in range(n_components): m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) # xs = torch.tensor(xs) # print (m.log_prob(lin)) ys = [] for x in xs: # print (m.log_prob(x)) # component_i = (torch.exp(m.log_prob(x) )* ((c+5.) / denom)).numpy() component_i = (torch.exp(m.log_prob(x) )* true_mixture_weights[c]).numpy() ys.append(component_i) ys = np.reshape(np.array(ys), [-1]) sum_ += ys ax.plot(xs, ys, label='', c='c') ax.plot(xs, sum_, label='') # mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0) # xs = np.linspace(-9,205, 300) # sum_ = np.zeros(len(xs)) # C = 20 # for c in range(C): # m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) # # xs = torch.tensor(xs) # # print (m.log_prob(lin)) # ys = [] # for x in xs: # # print (m.log_prob(x)) # component_i = (torch.exp(m.log_prob(x) )* mixture_weights[c]).detach().numpy() # ys.append(component_i) # ys = np.reshape(np.array(ys), [-1]) # sum_ += ys # ax.plot(xs, ys, label='', c='r') # ax.plot(xs, sum_, label='') # #HISTOGRAM # xs = [] # for i in range(10000): # x = sample_true().item() # xs.append(x) # ax.hist(xs, bins=200, density=True) # # save_dir = home+'/Documents/Grad_Estimators/GMM/' # if simplax: # plt_path = exp_dir+'gmm_pdf_plot_simplax.png' # elif reinforce: # plt_path = exp_dir+'gmm_pdf_plot_reinforce.png' # elif marginal: # plt_path = exp_dir+'gmm_pdf_plot_marginal.png' # plt.savefig(plt_path) # print ('saved training plot', plt_path) # plt.close() # save_dir = home+'/Documents/Grad_Estimators/GMM/' plt_path = exp_dir+'gmm_distplot.png' plt.savefig(plt_path) print ('saved training plot', plt_path) plt.close()
def forward(self, c, z, x=None): cz = torch.cat([c,z], dim=1) dist = Normal(self.mu(cz), self.sigma(cz)) if x is None: x = dist.rsample() return x, dist.log_prob(x).sum(dim=1) # Return value, score
def forward(self, inputs, c, z=None): mu_z = self.mu_z(inputs[:, 0]) sigma_z = self.sigma_z(inputs[:, 0]) dist = Normal(mu_z, sigma_z) if z is None: z = dist.rsample() return z, dist.log_prob(z).sum(dim=1) # Return value, score
ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1) xs = np.linspace(-9,205, 300) sum_ = np.zeros(len(xs)) C = 20 for c in range(C): m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) # xs = torch.tensor(xs) # print (m.log_prob(lin)) ys = [] for x in xs: # print (m.log_prob(x)) component_i = (torch.exp(m.log_prob(x) )* ((c+5.) / 290.)).numpy() ys.append(component_i) ys = np.reshape(np.array(ys), [-1]) sum_ += ys ax.plot(xs, ys, label='', c='c') ax.plot(xs, sum_, label='') mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0) xs = np.linspace(-9,205, 300) sum_ = np.zeros(len(xs)) C = 20 for c in range(C): m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float()) # xs = torch.tensor(xs)