def standardizing_transform( batch_t: Tensor, min_std: float = 1e-14 ) -> transforms.AffineTransform: """Builds standardizing transform Args: batch_t: Batched tensor from which mean and std deviation (across first dimension) are computed. min_std: Minimum value of the standard deviation to use when z-scoring to avoid division by zero. Returns: Affine transform for z-scoring """ is_valid_t, *_ = handle_invalid_x(batch_t, True) t_mean = torch.mean(batch_t[is_valid_t], dim=0) t_std = torch.std(batch_t[is_valid_t], dim=0) t_std[t_std < min_std] = min_std return transforms.AffineTransform(shift=-t_mean / t_std, scale=1 / t_std)
def standardizing_transform( batch_t: Tensor, structured_dims: bool = False, min_std: float = 1e-14) -> transforms.AffineTransform: """Builds standardizing transform Args: batch_t: Batched tensor from which mean and std deviation (across first dimension) are computed. structured_dim: Whether data dimensions are structured (e.g., time-series, images), which requires computing mean and std per sample first before aggregating over samples for a single standardization mean and std for the batch, or independent (default), which z-scores dimensions independently. min_std: Minimum value of the standard deviation to use when z-scoring to avoid division by zero. Returns: Affine transform for z-scoring """ is_valid_t, *_ = handle_invalid_x(batch_t, True) if structured_dims: # Structured data so compute a single mean over all dimensions # equivalent to taking mean over per-sample mean, i.e., # `torch.mean(torch.mean(.., dim=1))`. t_mean = torch.mean(batch_t[is_valid_t]) # Compute std per-sample first. sample_std = torch.std(batch_t[is_valid_t], dim=1) sample_std[sample_std < min_std] = min_std # Average over all samples for batch std. t_std = torch.mean(sample_std) else: t_mean = torch.mean(batch_t[is_valid_t], dim=0) t_std = torch.std(batch_t[is_valid_t], dim=0) t_std[t_std < min_std] = min_std return transforms.AffineTransform(shift=-t_mean / t_std, scale=1 / t_std)
def posterior_nn( model: str, prior: torch.distributions.Distribution, context: torch.Tensor, embedding: Optional[torch.nn.Module] = None, hidden_features: int = 50, mdn_num_components: int = 20, made_num_mixture_components: int = 10, made_num_blocks: int = 4, flow_num_transforms: int = 5, ) -> torch.nn.Module: """Neural posterior density estimator Args: model: Model, one of maf / mdn / made / nsf prior: Prior distribution context: Observation embedding: Embedding network hidden_features: For all, number of hidden features mdn_num_components: For MDNs only, number of components made_num_mixture_components: For MADEs only, number of mixture components made_num_blocks: For MADEs only, number of blocks flow_num_transforms: For flows only, number of transforms Returns: Neural network """ mean, std = (prior.mean, prior.stddev) standardizing_transform = transforms.AffineTransform( shift=-mean / std, scale=1 / std ) parameter_dim = prior.sample([1]).shape[1] context = utils.torchutils.atleast_2d(context) observation_dim = torch.tensor([context.shape[1:]]) if model == "mdn": neural_net = MultivariateGaussianMDN( features=parameter_dim, context_features=observation_dim, hidden_features=hidden_features, hidden_net=nn.Sequential( nn.Linear(observation_dim, hidden_features), nn.ReLU(), nn.Dropout(p=0.0), nn.Linear(hidden_features, hidden_features), nn.ReLU(), nn.Linear(hidden_features, hidden_features), nn.ReLU(), ), num_components=mdn_num_components, custom_initialization=True, ) elif model == "made": transform = standardizing_transform distribution = distributions_.MADEMoG( features=parameter_dim, hidden_features=hidden_features, context_features=observation_dim, num_blocks=made_num_blocks, num_mixture_components=made_num_mixture_components, use_residual_blocks=True, random_mask=False, activation=torch.relu, dropout_probability=0.0, use_batch_norm=False, custom_initialization=True, ) neural_net = flows.Flow(transform, distribution, embedding) elif model == "maf": transform = transforms.CompositeTransform( [ transforms.CompositeTransform( [ transforms.MaskedAffineAutoregressiveTransform( features=parameter_dim, hidden_features=hidden_features, context_features=observation_dim, num_blocks=2, use_residual_blocks=False, random_mask=False, activation=torch.tanh, dropout_probability=0.0, use_batch_norm=True, ), transforms.RandomPermutation(features=parameter_dim), ] ) for _ in range(flow_num_transforms) ] ) transform = transforms.CompositeTransform([standardizing_transform, transform,]) distribution = distributions_.StandardNormal((parameter_dim,)) neural_net = flows.Flow(transform, distribution, embedding) elif model == "nsf": transform = transforms.CompositeTransform( [ transforms.CompositeTransform( [ transforms.PiecewiseRationalQuadraticCouplingTransform( mask=create_alternating_binary_mask( features=parameter_dim, even=(i % 2 == 0) ), transform_net_create_fn=lambda in_features, out_features: nets.ResidualNet( in_features=in_features, out_features=out_features, hidden_features=hidden_features, context_features=observation_dim, num_blocks=2, activation=torch.relu, dropout_probability=0.0, use_batch_norm=False, ), num_bins=10, tails="linear", tail_bound=3.0, apply_unconditional_transform=False, ), transforms.LULinear(parameter_dim, identity_init=True), ] ) for i in range(flow_num_transforms) ] ) transform = transforms.CompositeTransform([standardizing_transform, transform,]) distribution = distributions_.StandardNormal((parameter_dim,)) neural_net = flows.Flow(transform, distribution, embedding) else: raise ValueError return neural_net