def _adapted_sampling( shape: Union[Tuple, torch.Size], dist: torch.distributions.Distribution, same_on_batch=False ) -> torch.Tensor: r"""The uniform sampling function that accepts 'same_on_batch'. If same_on_batch is True, all values generated will be exactly same given a batch_size (shape[0]). By default, same_on_batch is set to False. """ if same_on_batch: return dist.sample((1, *shape[1:])).repeat(shape[0], *[1] * (len(shape) - 1)) return dist.sample(shape)
def squash_action( dist: torch.distributions.Distribution, raw_action: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: squashed_action = torch.tanh(raw_action) jacob = 2 * (math.log(2) - raw_action - F.softplus(-2 * raw_action)) log_prob = (dist.log_prob(raw_action) - jacob).sum(dim=-1, keepdims=True) return squashed_action, log_prob
def forward(self, input: torch.distributions.Distribution, target: torch.Tensor) -> torch.Tensor: nll = -input.log_prob(target) if self.beta > 0.0: variance = input.variance nll = nll * (variance.detach()**self.beta) return nll
def expected_improvement( distribution: torch.distributions.Distribution, y_best: Union[torch.Tensor, float] = 0.0, n_samples: int = 64, ): improvement = torch.nn.functional.relu( distribution.sample((n_samples, )) - y_best) return improvement.mean(axis=0)
def classifier_nn( model, prior: torch.distributions.Distribution, context: torch.Tensor, hidden_features: int = 50, ) -> torch.nn.Module: """Neural classifier Args: model: Model, one of linear / mlp / resnet prior: Prior distribution context: Observation hidden_features: For all, number of hidden features Returns: Neural network """ parameter_dim = prior.sample([1]).shape[1] context = utils.torchutils.atleast_2d(context) observation_dim = torch.tensor([context.shape[1:]]) if model == "linear": neural_net = nn.Linear(parameter_dim + observation_dim, 1) elif model == "mlp": neural_net = nn.Sequential( nn.Linear(parameter_dim + observation_dim, hidden_features), nn.BatchNorm1d(hidden_features), nn.ReLU(), nn.Linear(hidden_features, hidden_features), nn.BatchNorm1d(hidden_features), nn.ReLU(), nn.Linear(hidden_features, 1), ) elif model == "resnet": neural_net = nets.ResidualNet( in_features=parameter_dim + observation_dim, out_features=1, hidden_features=hidden_features, context_features=None, num_blocks=2, activation=torch.relu, dropout_probability=0.0, use_batch_norm=False, ) else: raise ValueError(f"'model' must be one of ['linear', 'mlp', 'resnet'].") return neural_net
def logprob_from_distribution(self, policy: torch.distributions.Distribution, actions: torch.Tensor): """ Calculate the log-probability of an action under a policy. Args: - policy (torch.distributions.Distribution): The policy distribution over input state. - actions (torch.Tensor): Actions to take log probability of. Returns: - log_probs (torch.Tensor): Log-probabilities of actions under the policy distribution. """ return policy.log_prob(actions)
def forward(self, dist: torch.distributions.Distribution, action: torch.Tensor, advantage: torch.Tensor, old_logprob: torch.Tensor) -> torch.Tensor: """get the clipped surrogate loss for a distribution `dist` generated by the policy network. Args: dist (torch.distributions.Distribution): action distribution of the policy head being optimized action (torch.Tensor): the actual action taken during collection in the enviornment advantage (torch.Tensor): the estimated advantage of taking `action` old_logprob (torch.Tensor): the original log probability of taking `action` under the old policy. Returns: torch.Tensor """ new_logprob = dist.log_prob(action) ratio = torch.exp(new_logprob - old_logprob) unclipped = ratio * advantage clipped = torch.clamp(ratio, 1.0 - self.clip, 1.0 + self.clip) * advantage err = -torch.min(clipped, unclipped).mean() if self.entropy_bonus: err = err - dist.entropy().mean() * self.entropy_bonus return err
def forward(self, input: torch.distributions.Distribution, target: torch.Tensor) -> torch.Tensor: return -input.log_prob(target)
def posterior_nn( model: str, prior: torch.distributions.Distribution, context: torch.Tensor, embedding: Optional[torch.nn.Module] = None, hidden_features: int = 50, mdn_num_components: int = 20, made_num_mixture_components: int = 10, made_num_blocks: int = 4, flow_num_transforms: int = 5, ) -> torch.nn.Module: """Neural posterior density estimator Args: model: Model, one of maf / mdn / made / nsf prior: Prior distribution context: Observation embedding: Embedding network hidden_features: For all, number of hidden features mdn_num_components: For MDNs only, number of components made_num_mixture_components: For MADEs only, number of mixture components made_num_blocks: For MADEs only, number of blocks flow_num_transforms: For flows only, number of transforms Returns: Neural network """ mean, std = (prior.mean, prior.stddev) standardizing_transform = transforms.AffineTransform( shift=-mean / std, scale=1 / std ) parameter_dim = prior.sample([1]).shape[1] context = utils.torchutils.atleast_2d(context) observation_dim = torch.tensor([context.shape[1:]]) if model == "mdn": neural_net = MultivariateGaussianMDN( features=parameter_dim, context_features=observation_dim, hidden_features=hidden_features, hidden_net=nn.Sequential( nn.Linear(observation_dim, hidden_features), nn.ReLU(), nn.Dropout(p=0.0), nn.Linear(hidden_features, hidden_features), nn.ReLU(), nn.Linear(hidden_features, hidden_features), nn.ReLU(), ), num_components=mdn_num_components, custom_initialization=True, ) elif model == "made": transform = standardizing_transform distribution = distributions_.MADEMoG( features=parameter_dim, hidden_features=hidden_features, context_features=observation_dim, num_blocks=made_num_blocks, num_mixture_components=made_num_mixture_components, use_residual_blocks=True, random_mask=False, activation=torch.relu, dropout_probability=0.0, use_batch_norm=False, custom_initialization=True, ) neural_net = flows.Flow(transform, distribution, embedding) elif model == "maf": transform = transforms.CompositeTransform( [ transforms.CompositeTransform( [ transforms.MaskedAffineAutoregressiveTransform( features=parameter_dim, hidden_features=hidden_features, context_features=observation_dim, num_blocks=2, use_residual_blocks=False, random_mask=False, activation=torch.tanh, dropout_probability=0.0, use_batch_norm=True, ), transforms.RandomPermutation(features=parameter_dim), ] ) for _ in range(flow_num_transforms) ] ) transform = transforms.CompositeTransform([standardizing_transform, transform,]) distribution = distributions_.StandardNormal((parameter_dim,)) neural_net = flows.Flow(transform, distribution, embedding) elif model == "nsf": transform = transforms.CompositeTransform( [ transforms.CompositeTransform( [ transforms.PiecewiseRationalQuadraticCouplingTransform( mask=create_alternating_binary_mask( features=parameter_dim, even=(i % 2 == 0) ), transform_net_create_fn=lambda in_features, out_features: nets.ResidualNet( in_features=in_features, out_features=out_features, hidden_features=hidden_features, context_features=observation_dim, num_blocks=2, activation=torch.relu, dropout_probability=0.0, use_batch_norm=False, ), num_bins=10, tails="linear", tail_bound=3.0, apply_unconditional_transform=False, ), transforms.LULinear(parameter_dim, identity_init=True), ] ) for i in range(flow_num_transforms) ] ) transform = transforms.CompositeTransform([standardizing_transform, transform,]) distribution = distributions_.StandardNormal((parameter_dim,)) neural_net = flows.Flow(transform, distribution, embedding) else: raise ValueError return neural_net
def select_action(dist: torch.distributions.Distribution): return dist.sample()
def upper_confidence_boundary( distribution: torch.distributions.Distribution, percentage: Union[torch.Tensor, float] = 0.95, ): percentage = torch.tensor(percentage) return distribution.icdf(1 - (1 - percentage) / 2)
def probability_of_improvement( distribution: torch.distributions.Distribution, y_best: Union[torch.Tensor, float] = 0.0, ): return 1.0 - distribution.cdf(y_best)
def forward(self, input: torch.distributions.Distribution, target: torch.Tensor) -> torch.Tensor: return input.crps(target)