def distribution(self, distr_args, scale: Optional[torch.Tensor] = None) -> Distribution: mix_logits, loc, scale = distr_args comp_distr = Normal(loc, scale) if scale is None: return MixtureSameFamily(Categorical(logits=mix_logits), comp_distr) else: scaled_comp_distr = TransformedDistribution( comp_distr, [AffineTransform(loc=0, scale=scale)]) return MixtureSameFamily(Categorical(logits=mix_logits), scaled_comp_distr)
def __init__( self, dim_in: int, dim_out: int, posterior_rho_init: float, prior_type: str, prior_sigma: float = None, prior_pi: float = None, prior_sigma_1: float = None, prior_sigma_2: float = None, ): """ Assume a gaussian prior w_i ~ Normal(0, prior_sigma), with w_i iid Args: dim_in: dim_out: prior_sigma: std """ super().__init__() # Parameters theta=[mu,rho] of variational posterior q(w|theta): for weights... # Variational weight parameters and sample self.mu = nn.Parameter(torch.Tensor(dim_out, dim_in).normal_(0.0, 1.0)) self.rho = nn.Parameter( torch.ones(dim_out, dim_in) * posterior_rho_init) # ... and biases self.mu_bias = nn.Parameter(torch.Tensor(dim_out).normal_(0.0, 1.0)) self.rho_bias = nn.Parameter(torch.ones(dim_out) * posterior_rho_init) # Prior if prior_type == "normal": self.prior = Normal(0, prior_sigma) elif prior_type == "mixture": # Gaussian mixture prior (like Bayes By Backprop paper) mix = Categorical(torch.Tensor([prior_pi, 1 - prior_pi])) comp = Normal(torch.zeros(2), torch.Tensor([prior_sigma_1, prior_sigma_2])) self.prior = MixtureSameFamily(mix, comp) # todo: add prior that can be optimized to match GP self.log_variational_posterior = 0.0 self.log_prior = 0.0
def distribution(self, distr_args, scale: Optional[torch.Tensor] = None) -> Distribution: mix_logits, loc, dist_scale = distr_args distr = MixtureSameFamily(Categorical(logits=mix_logits), Normal(loc, dist_scale)) if scale is None: return distr else: return TransformedDistribution( distr, [AffineTransform(loc=0, scale=scale)])
def forward(self, x): norm_params = self.norm_network(x) # Split so we get parameters for mean and standard deviation mean, std = torch.split(norm_params, norm_params.shape[1] // 2, dim=1) # We need rightmost dimension to be n_components for mixture mean = mean.view(mean.shape[0], -1, self.n_components) std = std.view(std.shape[0], -1, self.n_components) normal = Normal(mean, torch.exp(std)) cat_params = self.cat_network(x) # Again, rightmost dimension must be n_components cat = Categorical( logits=cat_params.view(cat_params.shape[0], -1, self.n_components)) return MixtureSameFamily(cat, normal)
def forward(self, x: Tensor) -> Distribution: n = self.num_gaussians s = self.out_size # TODO logvars vs logstds mixture_coeff_logits, means, logvars = torch.split( self.conv(x).permute(0, 2, 3, 1), (n, n * s, n * s), dim=-1) stds = torch.exp(0.5 * logvars) means = means.view(*means.shape[:-1], n, s) stds = stds.view(*stds.shape[:-1], n, s) mixture = Categorical(logits=mixture_coeff_logits) component = Independent(Normal(means, stds), 1) # TODO Independent? mixture_model = Independent(MixtureSameFamily(mixture, component), 2) return mixture_model
def prior(self): """ Get the prior distribution p(z). Returns ------- p(z): @callable the distribution p(z) with shape (batch_size, latent_size). """ if self.n_mix_components > 1: mix = Categorical(probs=self.mixture_probs) comp = Independent(Normal(loc=self.loc, scale=func.softplus(self.raw_scale)), reinterpreted_batch_ndims=1) return MixtureSameFamily(mixture_distribution=mix, component_distribution=comp) else: return self._prior
def __call__(self, X, *args, **kwargs): if self.training: # at train time we need to ensure that each model only receives its own training inputs return [model(*model.train_inputs) for model in self.models] else: # now we compute weights and scale the posteriors by the weights # returning a mixture distribution weights = self._construct_weights(X)[..., 0].transpose( -1, -2).clamp_min(1e-4) weight_distribution = Categorical(weights) posterior_list = [m.likelihood(m(X)) for m in self.models] stacked_means = torch.stack([p.mean for p in posterior_list ]).transpose(-1, -2) stacked_covar_diags = torch.stack([ p.covariance_matrix.diag() for p in posterior_list ]).transpose(-1, -2) stacked_dist = Normal(stacked_means, stacked_covar_diags) return MixtureSameFamily(weight_distribution, stacked_dist)
def conditional_distribution( self, context: Union[np.array, Tensor] = None, data_normalization=True, context_one_hot_encoding=True) -> torch.distributions.Distribution: _, context, _ = self._preprocess_data(None, context, data_normalization, context_one_hot_encoding, False) pi, normal = self.pi_network(context), self.normal_network(context) mixture = RandomVariable( MixtureSameFamily(pi._categorical, Independent(normal, 1))) if self.inputs_normalization: mixture, _ = self._transform_inverse_normalise(mixture, None) return mixture.dist
def get_distribution_of_true_labels(self, labels, p_Q_given_x, distr_params): """ For each expert i, returns the probability associated to a specific label. """ eps = 1e-12 if 'binomial' in self.output_type: n, p = distr_params # Assume 1 for now if len(n.shape) == 2: n = n.unsqueeze(2) # add output feature dim # distr_params is now [samples, no_experts, 1=no_features] if len(p.shape) == 2: p = p.unsqueeze(2) # add output feature dim mix = Categorical(p_Q_given_x) comp = Independent(Binomial(n, p), 1) pmm = MixtureSameFamily(mix, comp) if len(labels.shape) == 1: labels = labels.unsqueeze(1) # add output feature dim x = pmm._pad(labels) emission_of_true_labels = pmm.component_distribution.log_prob( x.float()).exp() + eps # [samples, experts] elif 'gaussian' in self.output_type: mu, var = distr_params mix = Categorical(p_Q_given_x) comp = Independent( Normal(loc=mu, scale=var), 1) # mu/var have shape [samples, experts, features] gmm = MixtureSameFamily(mix, comp) # labels has shape [samples, features] x = gmm._pad(labels) emission_of_true_labels = gmm.component_distribution.log_prob( x).exp() # [samples, experts], one prob for each expert return emission_of_true_labels
def variant_b(tens, b): return MixtureSameFamily(Categorical(probs=tens[:1]), Normal(loc=tens[1:2], scale=tens[2:3])).cdf(b)
def ScaleMixtureNormal(pi, sigma1, sigma2): mixture = Categorical(torch.tensor([pi, 1 - pi], device=pi.device)) components = Normal(0.0, torch.tensor([sigma1, sigma2], device=pi.device)) return MixtureSameFamily(mixture, components)
def dist(probs, scales, locs): return MixtureSameFamily(Categorical(probs=probs), Normal(loc=locs, scale=scales))
def likelihood(self, theta: torch.Tensor, eps: float = 1e-8) -> Distribution: r""" p(x | theta) """ # Rotation matrix alpha = theta[..., 0].sigmoid().asin() beta = theta[..., 1].sigmoid().acos() gamma = theta[..., 2].tanh().atan() zero = torch.zeros_like(alpha) one = torch.ones_like(alpha) Rz = stack2d([ [alpha.cos(), -alpha.sin(), zero], [alpha.sin(), alpha.cos(), zero], [zero, zero, one], ]) Ry = stack2d([ [beta.cos(), zero, beta.sin()], [zero, one, zero], [-beta.sin(), zero, beta.cos()], ]) Rx = stack2d([ [one, zero, zero], [zero, gamma.cos(), -gamma.sin()], [zero, gamma.sin(), gamma.cos()], ]) R = (Rz @ Ry @ Rx)[..., :2, :2] # Mean d = theta[..., 3]**2 + 1. mu = d[..., None, None] * R # Covariance s1 = theta[..., 4]**2 + eps s2 = theta[..., 5]**2 + eps rho = theta[..., 6].tanh() cov1 = stack2d([ [s1**2, rho * s1 * s2], [rho * s1 * s2, s2**2], ]) cov2 = stack2d([ [1. / (s1 + 1.), zero], [zero, 1. / (s2 + 1.)], ]) cov = torch.stack([cov1, cov2], dim=-3) # Mixture p = theta[..., 7].sigmoid() mix = torch.stack([p, 1. - p], dim=-1) # Repeat mix = mix.unsqueeze(-2).repeat_interleave(8, -2) mu = mu.unsqueeze(-3).repeat_interleave(8, -3) cov = cov.unsqueeze(-4).repeat_interleave(8, -4) # Normal normal = MixtureSameFamily( Categorical(mix), MultivariateNormal(mu, cov), ) return Independent(normal, 1)
class BayesianLinear(nn.Module): def __init__( self, dim_in: int, dim_out: int, posterior_rho_init: float, prior_type: str, prior_sigma: float = None, prior_pi: float = None, prior_sigma_1: float = None, prior_sigma_2: float = None, ): """ Assume a gaussian prior w_i ~ Normal(0, prior_sigma), with w_i iid Args: dim_in: dim_out: prior_sigma: std """ super().__init__() # Parameters theta=[mu,rho] of variational posterior q(w|theta): for weights... # Variational weight parameters and sample self.mu = nn.Parameter(torch.Tensor(dim_out, dim_in).normal_(0.0, 1.0)) self.rho = nn.Parameter( torch.ones(dim_out, dim_in) * posterior_rho_init) # ... and biases self.mu_bias = nn.Parameter(torch.Tensor(dim_out).normal_(0.0, 1.0)) self.rho_bias = nn.Parameter(torch.ones(dim_out) * posterior_rho_init) # Prior if prior_type == "normal": self.prior = Normal(0, prior_sigma) elif prior_type == "mixture": # Gaussian mixture prior (like Bayes By Backprop paper) mix = Categorical(torch.Tensor([prior_pi, 1 - prior_pi])) comp = Normal(torch.zeros(2), torch.Tensor([prior_sigma_1, prior_sigma_2])) self.prior = MixtureSameFamily(mix, comp) # todo: add prior that can be optimized to match GP self.log_variational_posterior = 0.0 self.log_prior = 0.0 def forward(self, x, sample_from_prior: bool): if sample_from_prior: w = self.prior.sample(self.mu.shape) b = self.prior.sample(self.mu_bias.shape) return F.linear(x, w, b) # Sample the weights and forward it # perform all operations in the forward rather than in __init__ (including log[1+exp(rho)]) variational_posterior = Normal(self.mu, torch.log1p(torch.exp(self.rho))) variational_posterior_bias = Normal( self.mu_bias, torch.log1p(torch.exp(self.rho_bias))) w = variational_posterior.rsample() b = variational_posterior_bias.rsample() # Get the log prob self.log_variational_posterior = (variational_posterior.log_prob( w)).sum() + (variational_posterior_bias.log_prob(b)).sum() self.log_prior = self.prior.log_prob(w).sum() + self.prior.log_prob( b).sum() return F.linear(x, w, b)
def dist(self): mix = Categorical(logits=self.logit_pi) comp = Independent(Normal(self.loc, self.log_scale.exp()), 1) return MixtureSameFamily(mix, comp)