def log_likelihood(self): # evaluate the likelihood of the labelling (which is, # conveniently, just the likelihood of the current mixture # model) # FIXME: This should really be cached for the last invocation score = Counter() for c_idx, cluster in self._cluster_to_datum.iteritems(): if not cluster: continue # Evaluate the likelihood of each individual cluster cluster_size = len(cluster) # The mean of the data points belonging to this cluster cluster_datum_mean = sum(cluster) / cluster_size # p(c) score += Gaussian.log_prob(cluster_datum_mean, self._prior_mean, self._prior_precision) # p(x|c) score += sum( Gaussian.log_prob(datum, cluster_datum_mean, self._cluster_precision) for datum in cluster) # score => p(x, c) # for the gaussian the dimensions are independent so we should # just be able to combine them directly return score.total_count()
class BLinear(nn.Module): """Bayesian Linear layer, default prior is Gaussian for weights and bias""" def __init__(self, in_features, out_features): super().__init__() self.in_features = in_features self.out_features = out_features # Weight parameters self.weight_mu = nn.Parameter( torch.Tensor(out_features, in_features).uniform_(-0.2, 0.2)) self.weight_rho = nn.Parameter( torch.Tensor(out_features, in_features).uniform_(1e-1, 2)) # variational posterior for the weights self.weight = Gaussian(self.weight_mu, self.weight_rho) # Bias parameters self.bias_mu = nn.Parameter( torch.Tensor(out_features).uniform_(-0.2, 0.2)) self.bias_rho = nn.Parameter( torch.Tensor(out_features).uniform_(1e-1, 2)) # variational posterior for the bias self.bias = Gaussian(self.bias_mu, self.bias_rho) # Prior distributions self.weight_prior = Gaussian(torch.Tensor([0.]), torch.Tensor([1.])) self.bias_prior = Gaussian(torch.Tensor([0.]), torch.Tensor([1.])) # initialize log_prior and log_posterior as 0 self.log_prior = 0 self.log_variational_posterior = 0 def forward(self, input, sample=False, calculate_log_probs=False): # 1. Sample weights and bias from variational posterior if self.training or sample: weight = self.weight.sample() bias = self.bias.sample() else: weight = self.weight.mu bias = self.bias.mu # 2. Update log_prior and log_posterior according to current approximation if self.training or calculate_log_probs: self.log_prior = self.weight_prior.log_prob( weight) + self.bias_prior.log_prob(bias) self.log_variational_posterior = self.weight.log_prob( weight) + self.bias.log_prob(bias) else: self.log_prior, self.log_variational_posterior = 0, 0 # 3. Do a forward pass through the layer return F.linear(input, weight, bias)
def elbo(self, input, target, samples=20): outputs = [] log_priors = torch.zeros(samples) log_variational_posteriors = torch.zeros(samples) # draw n_samples from the posterior (run n_samples forward passes) for i in range(samples): outputs.append(self(input, sample=True)) log_priors[i] = self.log_prior() log_variational_posteriors[i] = self.log_variational_posterior() log_prior = log_priors.sum() log_variational_posterior = log_variational_posteriors.sum() outputs = torch.stack(outputs) y_dist = Gaussian(outputs.mean(0), self.noise) # negative_log_likelihood = self.loss_function( # outputs.mean(0), target, reduction='sum') negative_log_likelihood = -y_dist.log_prob(target) / len(input) # loss = nll + kl loss = negative_log_likelihood kl = (log_variational_posterior - log_prior) / self.n_training loss += kl return loss, log_prior, log_variational_posterior, negative_log_likelihood
def _cluster_log_probs(self, cluster, cluster_size, cluster_mean, cluster_covariance, new_point): """ Return the posterior, prior, and likelihood of new_point being in the cluster of size cluster_size centered at cluster_mean """ # the updated mean new_mean = (cluster_mean * cluster_size + new_point) / (cluster_size + 1) posterior_precision = self._prior_precision + self._cluster_precision # convex combination for mean posterior_mean = self._prior_mean * self._prior_precision posterior_mean += cluster_mean * self._cluster_precision posterior_mean /= posterior_precision posterior = Gaussian.log_prob(new_mean, posterior_mean, posterior_precision) # prior is keyed on the (potentially) updated params prior = Gaussian.log_prob(new_mean, self._prior_mean, self._prior_precision) likelihood = Gaussian.log_prob(new_point, new_mean, self._cluster_precision) return posterior, prior, likelihood
def log_likelihood(self): # evaluate the likelihood of the labelling (which is, # conveniently, just the likelihood of the current mixture # model) # FIXME: This should really be cached for the last invocation score = Counter() for c_idx, cluster in self._cluster_to_datum.iteritems(): if not cluster: continue # Evaluate the likelihood of each individual cluster cluster_size = len(cluster) # The mean of the data points belonging to this cluster cluster_datum_mean = sum(cluster) / cluster_size # p(c) score += Gaussian.log_prob(cluster_datum_mean, self._prior_mean, self._prior_precision) # p(x|c) score += sum(Gaussian.log_prob(datum, cluster_datum_mean, self._cluster_precision) for datum in cluster) # score => p(x, c) # for the gaussian the dimensions are independent so we should # just be able to combine them directly return score.total_count()