def forward(self, design, target_labels=None): """ Sample the posterior. :param torch.Tensor design: tensor of possible designs. :param list target_labels: list indicating the sample sites that are targets, i.e. for which information gain should be measured. """ if target_labels is None: target_labels = list(self.means.keys()) pyro.module("laplace_guide", self) with ExitStack() as stack: for plate in iter_plates_to_shape(design.shape[:-2]): stack.enter_context(plate) if self.training: # MAP via Delta guide for l in target_labels: w_dist = dist.Delta(self.means[l]).to_event(1) pyro.sample(l, w_dist) else: # Laplace approximation via MVN with hessian for l in target_labels: w_dist = dist.MultivariateNormal( self.means[l], scale_tril=self.scale_trils[l]) pyro.sample(l, w_dist)
def normal_inv_gamma_family_guide(design, obs_sd, w_sizes, mf=False): """Normal inverse Gamma family guide. If `obs_sd` is known, this is a multivariate Normal family with separate parameters for each batch. `w` is sampled from a Gaussian with mean `mw_param` and covariance matrix derived from `obs_sd * lambda_param` and the two parameters `mw_param` and `lambda_param` are learned. If `obs_sd=None`, this is a four-parameter family. The observation precision `tau` is sampled from a Gamma distribution with parameters `alpha`, `beta` (separate for each batch). We let `obs_sd = 1./torch.sqrt(tau)` and then proceed as above. :param torch.Tensor design: a tensor with last two dimensions `n` and `p` corresponding to observations and features respectively. :param torch.Tensor obs_sd: observation standard deviation, or `None` to use inverse Gamma :param OrderedDict w_sizes: map from variable names to torch.Size """ # design is size batch x n x p # tau is size batch tau_shape = design.shape[:-2] with ExitStack() as stack: for plate in iter_plates_to_shape(tau_shape): stack.enter_context(plate) if obs_sd is None: # First, sample tau (observation precision) alpha = softplus( pyro.param("invsoftplus_alpha", 20.0 * torch.ones(tau_shape)) ) beta = softplus( pyro.param("invsoftplus_beta", 20.0 * torch.ones(tau_shape)) ) # Global variable tau_prior = dist.Gamma(alpha, beta) tau = pyro.sample("tau", tau_prior) obs_sd = 1.0 / torch.sqrt(tau) # response will be shape batch x n obs_sd = obs_sd.expand(tau_shape).unsqueeze(-1) for name, size in w_sizes.items(): w_shape = tau_shape + size # Set up mu and lambda mw_param = pyro.param("{}_guide_mean".format(name), torch.zeros(w_shape)) scale_tril = pyro.param( "{}_guide_scale_tril".format(name), torch.eye(*size).expand(tau_shape + size + size), constraint=constraints.lower_cholesky, ) # guide distributions for w if mf: w_dist = dist.MultivariateNormal(mw_param, scale_tril=scale_tril) else: w_dist = dist.MultivariateNormal( mw_param, scale_tril=obs_sd.unsqueeze(-1) * scale_tril ) pyro.sample(name, w_dist)
def model(design): batch_shape = design.shape with ExitStack() as stack: for plate in iter_plates_to_shape(batch_shape): stack.enter_context(plate) theta = pyro.sample("theta", dist.Bernoulli(0.4).expand(batch_shape)) y = pyro.sample("y", dist.Bernoulli((design + theta) / 2.0)) return y
def bayesian_linear_model(design, w_means={}, w_sqrtlambdas={}, re_group_sizes={}, re_alphas={}, re_betas={}, obs_sd=None, alpha_0=None, beta_0=None, response="normal", response_label="y", k=None): """ A pyro model for Bayesian linear regression. If :param:`response` is `"normal"` this corresponds to a linear regression model :math:`Y = Xw + \\epsilon` with `\\epsilon`` i.i.d. zero-mean Gaussian. The observation standard deviation (:param:`obs_sd`) may be known or unknown. If unknown, it is assumed to follow an inverse Gamma distribution with parameters :param:`alpha_0` and :param:`beta_0`. If the response type is `"bernoulli"` we instead have :math:`Y \\sim Bernoulli(p)` with :math:`logit(p) = Xw` Given parameter groups in :param:`w_means` and :param:`w_sqrtlambda`, the fixed effects regression coefficient is taken to be Gaussian with mean `w_mean` and standard deviation given by :math:`\\sigma / \\sqrt{\\lambda}` corresponding to the normal inverse Gamma family. The random effects coefficient is constructed as follows. For each random effect group, standard deviations for that group are sampled from a normal inverse Gamma distribution. For each group, a random effect coefficient is then sampled from a zero mean Gaussian with those standard deviations. :param torch.Tensor design: a tensor with last two dimensions `n` and `p` corresponding to observations and features respectively. :param OrderedDict w_means: map from variable names to tensors of fixed effect means. :param OrderedDict w_sqrtlambdas: map from variable names to tensors of square root :math:`\\lambda` values for fixed effects. :param OrderedDict re_group_sizes: map from variable names to int representing the group size :param OrderedDict re_alphas: map from variable names to `torch.Tensor`, the tensor consists of Gamma dist :math:`\\alpha` values :param OrderedDict re_betas: map from variable names to `torch.Tensor`, the tensor consists of Gamma dist :math:`\\beta` values :param torch.Tensor obs_sd: the observation standard deviation (if assumed known). This is still relevant in the case of Bernoulli observations when coefficeints are sampled using `w_sqrtlambdas`. :param torch.Tensor alpha_0: Gamma :math:`\\alpha` parameter for unknown observation covariance. :param torch.Tensor beta_0: Gamma :math:`\\beta` parameter for unknown observation covariance. :param str response: Emission distribution. May be `"normal"` or `"bernoulli"`. :param str response_label: Variable label for response. :param torch.Tensor k: Only used for a sigmoid response. The slope of the sigmoid transformation. """ # design is size batch x n x p # tau is size batch batch_shape = design.shape[:-2] with ExitStack() as stack: for plate in iter_plates_to_shape(batch_shape): stack.enter_context(plate) if obs_sd is None: # First, sample tau (observation precision) tau_prior = dist.Gamma(alpha_0.unsqueeze(-1), beta_0.unsqueeze(-1)).to_event(1) tau = pyro.sample("tau", tau_prior) obs_sd = 1. / torch.sqrt(tau) elif alpha_0 is not None or beta_0 is not None: warnings.warn("Values of `alpha_0` and `beta_0` unused becased" "`obs_sd` was specified already.") obs_sd = obs_sd.expand(batch_shape + (1, )) # Build the regression coefficient w = [] # Allow different names for different coefficient groups # Process fixed effects for name, w_sqrtlambda in w_sqrtlambdas.items(): w_mean = w_means[name] # Place a normal prior on the regression coefficient w_prior = dist.Normal(w_mean, obs_sd / w_sqrtlambda).to_event(1) w.append(pyro.sample(name, w_prior)) # Process random effects for name, group_size in re_group_sizes.items(): # Sample `G` once for this group alpha, beta = re_alphas[name], re_betas[name] G_prior = dist.Gamma(alpha, beta).to_event(1) G = 1. / torch.sqrt(pyro.sample("G_" + name, G_prior)) # Repeat `G` for each group repeat_shape = tuple(1 for _ in batch_shape) + (group_size, ) u_prior = dist.Normal(torch.tensor(0.), G.repeat(repeat_shape)).to_event(1) w.append(pyro.sample(name, u_prior)) # Regression coefficient `w` is batch x p w = broadcast_cat(w) # Run the regressor forward conditioned on inputs prediction_mean = rmv(design, w) if response == "normal": # y is an n-vector: hence use .to_event(1) return pyro.sample( response_label, dist.Normal(prediction_mean, obs_sd).to_event(1)) elif response == "bernoulli": return pyro.sample( response_label, dist.Bernoulli(logits=prediction_mean).to_event(1)) elif response == "sigmoid": base_dist = dist.Normal(prediction_mean, obs_sd).to_event(1) # You can add loc via the linear model itself k = k.expand(prediction_mean.shape) transforms = [ AffineTransform(loc=torch.tensor(0.), scale=k), SigmoidTransform() ] response_dist = dist.TransformedDistribution(base_dist, transforms) return pyro.sample(response_label, response_dist) else: raise ValueError( "Unknown response distribution: '{}'".format(response))