def __call__(self, batch=None, temp=1.0): posterior = {} for node, obj in self.model_trace.iter_stochastic_nodes(): par = obj['value'] if node == 'h0-batch': mean = pyro.param(f"{node}-mean", init_tensor=0.01 * torch.zeros( (self.num_users, self.hidden_dim))) scale = pyro.param( f"{node}-scale", init_tensor=0.001 + 0.05 * 0.01 * torch.ones( (self.num_users, self.hidden_dim)), constraint=constraints.interval(0, self.maxscale)) with pyro.plate("data", size=self.num_users, subsample=batch['userId']): posterior[node] = pyro.sample( node, dist.Normal(mean[batch['userId']], temp * scale[batch['userId']]).to_event(1)) else: mean = pyro.param(f"{node}-mean", init_tensor=0.05 * par.detach().clone()) scale = pyro.param( f"{node}-scale", init_tensor=0.05 + 0.01 * par.detach().clone().abs(), constraint=constraints.interval(0, self.maxscale)) posterior[node] = pyro.sample( node, dist.Normal(mean, temp * scale).independent()) return posterior
def guide(noise): """ The guide serves as an approximation to the posterior p(z|x). The guide provides a valid joint probability density over all the latent random variables in the model. https://pyro.ai/examples/svi_part_i.html """ # create params with constraints mu = { 'N_X': pyro.param('N_X_mu', 0.5*torch.ones(self.image_dim),constraint = constraints.interval(0., 1.)), 'N_Z': pyro.param('N_Z_mu', torch.zeros(self.z_dim),constraint = constraints.interval(-3., 3.)), 'N_Y_1': pyro.param('N_Y_1_mu', 0.5*torch.ones(self.label_dims[1]),constraint = constraints.interval(0., 1.)), 'N_Y_2': pyro.param('N_Y_2_mu', 0.5*torch.ones(self.label_dims[2]),constraint = constraints.interval(0., 1.)), 'N_Y_3': pyro.param('N_Y_3_mu', 0.5*torch.ones(self.label_dims[3]),constraint = constraints.interval(0., 1.)), 'N_Y_4': pyro.param('N_Y_4_mu', 0.5*torch.ones(self.label_dims[4]),constraint = constraints.interval(0., 1.)), 'N_Y_5': pyro.param('N_Y_5_mu', 0.5*torch.ones(self.label_dims[5]),constraint = constraints.interval(0., 1.)) } sigma = { 'N_X': pyro.param('N_X_sigma', 0.1*torch.ones(self.image_dim),constraint = constraints.interval(0.0001, 0.5)), 'N_Z': pyro.param('N_Z_sigma', torch.ones(self.z_dim),constraint = constraints.interval(0.0001, 3.)), 'N_Y_1': pyro.param('N_Y_1_sigma', 0.1*torch.ones(self.label_dims[1]),constraint = constraints.interval(0.0001, 0.5)), 'N_Y_2': pyro.param('N_Y_2_sigma', 0.1*torch.ones(self.label_dims[2]),constraint = constraints.interval(0.0001, 0.5)), 'N_Y_3': pyro.param('N_Y_3_sigma', 0.1*torch.ones(self.label_dims[3]),constraint = constraints.interval(0.0001, 0.5)), 'N_Y_4': pyro.param('N_Y_4_sigma', 0.1*torch.ones(self.label_dims[4]),constraint = constraints.interval(0.0001, 0.5)), 'N_Y_5': pyro.param('N_Y_5_sigma', 0.1*torch.ones(self.label_dims[5]),constraint = constraints.interval(0.0001, 0.5)) } for noise_term in noise.keys(): pyro.sample(noise_term, dist.Normal(mu[noise_term], sigma[noise_term]).to_event(1))
def to_constrained_interval(state_dict, lscale, amp): """ Transforms kernel's unconstrained lenghscale and variance to their constrained domains (intervals) Args: state_dict: dict kernel's state dictionary; can be obtained from self.spgr.kernel.state_dict lscale: list list of two lists with lower and upper bound(s) for lenghtscale prior. Number of elements in each list is usually equal to the number of (independent) input dimensions amp: list list with two floats corresponding to lower and upper bounds for variance (square of amplitude) prior Returns: Lengthscale and variance in the constrained domain (interval) """ l_ = state_dict()['lenghtscale_map_unconstrained'] a_ = state_dict()['variance_map_unconstrained'] l_interval = constraints.interval(torch.tensor(lscale[0]), torch.tensor(lscale[1])) a_interval = constraints.interval(torch.tensor(amp[0]), torch.tensor(amp[1])) l = transform_to(l_interval)(l_) a = transform_to(a_interval)(a_) return l, a
def build_support( lower_bound: Optional[Tensor] = None, upper_bound: Optional[Tensor] = None) -> constraints.Constraint: """Return support for prior distribution, depending on available bounds. Args: lower_bound: lower bound of the prior support, can be None upper_bound: upper bound of the prior support, can be None Returns: support: Pytorch constraint object. """ # Support is real if no bounds are passed. if lower_bound is None and upper_bound is None: support = constraints.real warnings.warn( """No prior bounds were passed, consider passing lower_bound and / or upper_bound if your prior has bounded support.""") # Only lower bound is specified. elif upper_bound is None: num_dimensions = lower_bound.numel() # type: ignore if num_dimensions > 1: support = constraints._IndependentConstraint( constraints.greater_than(lower_bound), 1, ) else: support = constraints.greater_than(lower_bound) # Only upper bound is specified. elif lower_bound is None: num_dimensions = upper_bound.numel() if num_dimensions > 1: support = constraints._IndependentConstraint( constraints.less_than(upper_bound), 1, ) else: support = constraints.less_than(upper_bound) # Both are specified. else: num_dimensions = lower_bound.numel() assert (num_dimensions == upper_bound.numel() ), "There must be an equal number of independent bounds." if num_dimensions > 1: support = constraints._IndependentConstraint( constraints.interval(lower_bound, upper_bound), 1, ) else: support = constraints.interval(lower_bound, upper_bound) return support
def model(x, verbose=False): # Parameters theta_D = pyro.param( 'theta_D', torch.tensor(0.5), constraint=constraints.interval(0, 1), ) theta_I = pyro.param( 'theta_I', torch.tensor(0.5), constraint=constraints.interval(0, 1), ) theta_S = pyro.param( 'theta_S', torch.tensor([0.5, 0.5]), constraint=constraints.interval(0, 1), ) theta_G = pyro.param( 'theta_G', torch.ones(2, 2, 3).div(3), constraint=constraints.simplex, ) theta_L = pyro.param( 'theta_L', torch.tensor([0.5, 0.5, 0.5]), constraint=constraints.interval(0, 1), ) # Forward with pyro.plate('data', x.shape[0]): d = pyro.sample('Difficulty', dist.Bernoulli(probs=theta_D), obs=x.d).long() i = pyro.sample('Intelligence', dist.Bernoulli(probs=theta_I), obs=x.i).long() s = pyro.sample('SAT', dist.Bernoulli(probs=theta_S[i]), obs=x.s).long() # Grade not observed but enumerated g = pyro.sample('Grade', dist.Categorical(probs=theta_G[i, d]), infer={ "enumerate": "parallel" }).long() l = pyro.sample('Letter', dist.Bernoulli(probs=theta_L[g]), obs=x.l).long()
def guide(exogenous_dist_dict): mu_constraints = constraints.interval(-3., 3.) sigma_constraints = constraints.interval(.0001, 3) for exg_name, exg_dist in exogenous_dist_dict.items(): # mu_guide = pyro.param("mu_{}".format(exg_name), torch.tensor(exg_dist.loc), constraint=mu_constraints) # sigma_guide = pyro.param("sigma_{}".format(exg_name), torch.tensor(exg_dist.scale), constraint=sigma_constraints) mu_guide = pyro.param("mu_{}".format(exg_name), torch.tensor(0.0), constraint=mu_constraints) sigma_guide = pyro.param("sigma_{}".format(exg_name), torch.tensor(1.0), constraint=sigma_constraints) # [Todo] support the binary parent noise_dist = pyro.distributions.Normal pyro.sample(exg_name, noise_dist(mu_guide, sigma_guide))
def h0_amortization_func(self, batch_click, mask=None, V=None, amort_scalefactor=None): if V is None: V = pyro.param("model.item_model.itemvec.weight-mean") if amort_scalefactor is None: amort_scalefactor = pyro.param("h0-amort-scalefactor", torch.tensor(0.2), constraints.interval(0.001, 5.0)) if mask is None: mask = torch.ones_like(batch_click) if self.freeze_item_parameters: V = V.detach() amort_scalefactor = amort_scalefactor.detach() click_item_ge3 = (torch.gt(batch_click, 3) * 1) batch_size, t_maxclick = batch_click.size() weight = click_item_ge3 / ( (click_item_ge3.cumsum(dim=1).float()**self.h0_amort_decayfactor) + 1e-5) weight = weight * mask # Only use those that are in training set weight = weight / (weight.sum(dim=1, keepdims=True) + 1e-5) click_vecs = V[batch_click] mean = (click_vecs * weight.unsqueeze(-1)).sum(1) # Scale weight = click_item_ge3 / ( (click_item_ge3.sum(dim=1, keepdim=True).float()) + 1e-5) V_variation = ((click_vecs - click_vecs.mean(dim=1, keepdim=True))** 2) * weight.unsqueeze(-1) scale = amort_scalefactor * (V_variation.sum(dim=1) + 1e-6).sqrt() return mean, scale
def guide(x_data, y_data): mean_mean = pyro.param("u_u", torch.ones(D, K) * 1.5) mean_scale = pyro.param("u_s", torch.ones(D, K), constraint=constraints.positive) std_mean = pyro.param("s_u", torch.ones(D, K) * 0.6) std_scale = pyro.param("s_s", torch.ones(D, K) * 0.5, constraint=constraints.positive) pk_concentration = pyro.param("pk", torch.ones(K, ), constraint=constraints.interval(0.01, 1000)) pk_prior = pyro.distributions.Dirichlet(pk_concentration) pk_sample = pyro.sample('pk_sample', pk_prior) with pyro.plate('k', size=K): with pyro.plate('d', size=D): mean_prior = pyro.distributions.Normal(loc=mean_mean, scale=mean_scale) std_prior = pyro.distributions.Normal(loc=std_mean, scale=std_scale) mean_sample = pyro.sample('mean_sample', mean_prior) std_sample = pyro.sample('std_sample', std_prior) return mean_sample, std_sample, pk_sample
def find_a_candidate(self, x_init): #acquisition func """Given a starting point, `x_init`, takes one LBFGS step to optimize the differentiable function. :param function differentiable: a function amenable to torch autograd :param torch.Tensor x_init: the initial point """ # transform x to an unconstrained domain constraint = constraints.interval(self.constraints.lower_bound, self.constraints.upper_bound) unconstrained_x_init = transform_to(constraint).inv(x_init) unconstrained_x = unconstrained_x_init.clone().detach().requires_grad_( True) minimizer = optim.LBFGS([unconstrained_x]) def closure(): minimizer.zero_grad() x = transform_to(self.constraints)(unconstrained_x) y = self.lower_confidence_bound(x) autograd.backward(unconstrained_x, autograd.grad(y, unconstrained_x)) return y minimizer.step(closure) # after finding a candidate in the unconstrained domain, # convert it back to original domain. x = transform_to(constraint)(unconstrained_x) return x.detach()
class TanhTransform(TransformModule): """ Transform via tanh(). """ domain = constraints.real codomain = constraints.interval(-1., 1.) bijective = True def __init__(self): super(TanhTransform, self).__init__() self._pretanh_value = None def __eq__(self, other): return isinstance(other, TanhTransform) def _call(self, x): self._pretanh_value = x finfo = torch.finfo(x.dtype) return torch.clamp(torch.tanh(x), min=-1 + finfo.eps, max=1. - finfo.eps) def _inverse(self, y): if self._pretanh_value is not None: try: return self._pretanh_value.view(y.shape) except: pass return 0.5 * torch.log((1 + y) / (1 - y + 1e-6) + 1e-6) def log_abs_det_jacobian(self, x, y): return torch.log(1. - torch.tanh(x) ** 2 + 1e-6)
class TanhTransform(Transform): r""" Transform via the mapping :math:`y = \tanh(x)`. """ domain = constraints.real codomain = constraints.interval(-1.0, 1.0) bijective = True sign = +1 @staticmethod def atanh(x): return 0.5 * (x.log1p() - (-x).log1p()) def __eq__(self, other): return isinstance(other, TanhTransform) def _call(self, x): return x.tanh() def _inverse(self, y): return self.atanh(y) def log_abs_det_jacobian(self, x, y): # We use a formula that is more numerically stable, see details in the following link # https://github.com/tensorflow/probability/commit/ef6bb176e0ebd1cf6e25c6b5cecdd2428c22963f#diff-e120f70e92e6741bca649f04fcd907b7 return 2. * (np.log(2.) - x - F.softplus(-2. * x))
def guide(self, diurnality, viirs_observed, land_cover, latitude, longitude, meteorology, annealing_factor=1.0): T_max = viirs_observed.size(1) batch_size = diurnality.shape[0] pyro.module("vae", self) c_0_contig, h_0_contig = self.rnn_state_contig(batch_size) rnn_output = self.crnn(viirs_observed, h_0_contig, c_0_contig) z_prev = self.z_q_0.expand(batch_size, self.z_q_0.size(0)) _constraint = constraints.interval(1, 100) alpha_q = pyro.param("alpha", torch.tensor(10.0, device=diurnality.device), constraint=_constraint) beta_q = pyro.param("beta", torch.tensor(10.0, device=diurnality.device), constraint=_constraint) pyro.sample("diurnal_ratio", dist.Beta(alpha_q, beta_q)) with pyro.plate("data", batch_size): for t in pyro.markov(range(1, T_max + 1)): z_loc_q, z_scale_q = self.combiner(z_prev, rnn_output[:, t - 1, :], diurnality) z_t = self.sample_latent_space(annealing_factor, batch_size, t, z_loc_q, z_scale_q) z_prev = z_t
class TanhNormal(TransformedDistribution): """ Transform a Gaussian using tanh to ensure samples are in (-1, 1). """ arg_constraints = {'loc': constraints.real, 'scale': constraints.positive} support = constraints.interval(-1., 1.) has_rsample = True def __init__(self, loc, scale, validate_args=None): self.base_dist = Normal(loc, scale) self.loc = self.base_dist.loc self.scale = self.base_dist.scale self.trans = [TanhTransform()] super(TanhNormal, self).__init__(self.base_dist, self.trans, validate_args=validate_args) def expand(self, batch_shape, _instance=None): new = self._get_checked_instance(TanhNormal, _instance) batch_shape = torch.Size(batch_shape) new.loc = self.loc.expand(batch_shape) new.scale = self.scale.expand(batch_shape) new.base_dist = Normal(new.loc, new.scale) new.trans = self.trans super(TanhNormal, new).__init__(new.base_dist, new.trans, validate_args=False) new._validate_args = self._validate_args return new
def __init__(self, log_prob, grid, expand_shape = torch.Size([])): self._log_prob = log_prob self.device = 'cpu' if not grid.is_cuda else grid.get_device() self._grid = grid self._event_shape = torch.Size([]) # all variables are independent (but may have different pdfs) self._prob_shape = log_prob(grid[0]).shape # shape of log_prob (pdfs might differ) self._expand_shape = expand_shape self._batch_shape = self._expand_shape + self._prob_shape N = grid.shape[0] # Num grid points for tabulating log_prob # Expand grid to match (N,) + prob_shape if len(grid.shape) == 1: grid = grid.reshape((N,)+(1,)*len(self._prob_shape)) grid = grid.expand((grid.shape[0],)+self._prob_shape) # TODO: tensor shapes correct? self._support = constraints.interval(grid[0], grid[-1]) # define finite support cdf, grid, norm = self._get_cdf(log_prob, grid) self.D = self._prob_shape.numel() # Number of pdfs self.R = self.batch_shape.numel() # Number of batch evaluations self.R_D = int(self.R/self.D) # Batch evaluations with identical set of pdfs self.x = cdf.reshape(N, self.D).permute(1, 0) # Prepare for interp1d self.y = grid.reshape(N, self.D).permute(1, 0) # Prepare for interp1d self.log_scale = torch.log(norm) self.interp1d = Interp1d()
def __init__(self, a, b, eps=1e-8, validate_args=None): self.a, self.b = broadcast_all(a, b) if isinstance(a, Number) and isinstance(b, Number): batch_shape = torch.Size() else: batch_shape = self.a.size() super(TruncatedStandardNormal, self).__init__(batch_shape, validate_args=validate_args) if self.a.dtype != self.b.dtype: raise ValueError("Truncation bounds types are different") if any((self.a >= self.b).view(-1, ).tolist()): raise ValueError("Incorrect truncation range") self._dtype_min_gt_0 = torch.tensor(torch.finfo(self.a.dtype).eps, dtype=self.a.dtype) self._dtype_max_lt_1 = torch.tensor(1 - torch.finfo(self.a.dtype).eps, dtype=self.a.dtype) self._little_phi_a = self._little_phi(self.a) self._little_phi_b = self._little_phi(self.b) self._big_phi_a = self._big_phi(self.a) self._big_phi_b = self._big_phi(self.b) self._Z = (self._big_phi_b - self._big_phi_a).clamp_min(eps) self._log_Z = self._Z.log() self._lpbb_m_lpaa_d_Z = (self._little_phi_b * self.b - self._little_phi_a * self.a) / self._Z self._mean = -(self._little_phi_b - self._little_phi_a) / self._Z self._variance = (1 - self._lpbb_m_lpaa_d_Z - ( (self._little_phi_b - self._little_phi_a) / self._Z)**2) self._entropy = CONST_LOG_SQRT_2PI_E + self._log_Z - 0.5 * self._lpbb_m_lpaa_d_Z self._support = constraints.interval(self.a, self.b)
class TanhARNormal(ARNormal): """ Auto-regressive transformed Normal distribution with final tanh transform. """ arg_constraints = {'loc': constraints.real, 'scale': constraints.positive} support = constraints.interval(-1., 1.) has_rsample = True def __init__(self, loc, scale, transforms, validate_args=None): transforms.append(TanhTransform()) super(TanhARNormal, self).__init__(loc, scale, transforms, validate_args) def expand(self, batch_shape, _instance=None): new = self._get_checked_instance(TanhARNormal, _instance) batch_shape = torch.Size(batch_shape) new.loc = self.loc.expand(batch_shape) new.scale = self.scale.expand(batch_shape) new.base_dist = Normal(new.loc, new.scale) new.trans = self.trans super(TanhARNormal, new).__init__(new.loc, new.scale, new.trans, validate_args=False) new._validate_args = self._validate_args return new
class TanhTransform(Transform): r""" Bijective transform via the mapping :math:`y = \text{tanh}(x)`. """ domain = constraints.real codomain = constraints.interval(-1., 1.) bijective = True sign = +1 @staticmethod def atanh(x): return 0.5 * (x.log1p() - (-x).log1p()) def __eq__(self, other): return isinstance(other, TanhTransform) def _call(self, x): return torch.tanh(x) def _inverse(self, y): eps = torch.finfo(y.dtype).eps return self.atanh(y.clamp(min=-1. + eps, max=1. - eps)) def log_abs_det_jacobian(self, x, y): return -2. * (x - math.log(2.) + F.softplus(-2. * x))
def main(num_vi_steps, num_bo_steps, seed): pyro.set_rng_seed(seed) pyro.clear_param_store() est_ape = partial(estimated_ape, num_vi_steps=num_vi_steps) est_ape.__doc__ = "Estimated APE by VI" estimators = [true_ape, est_ape] noises = [0.0001, 0.25] num_acqs = [2, 10] for f, noise, num_acquisitions in zip(estimators, noises, num_acqs): X = torch.tensor([25., 75.]) y = f(X) gpmodel = gp.models.GPRegression(X, y, gp.kernels.Matern52( input_dim=1, lengthscale=torch.tensor(10.)), noise=torch.tensor(noise), jitter=1e-6) gpbo = GPBayesOptimizer(constraints.interval(0, 100), gpmodel, num_acquisitions=num_acquisitions) pyro.clear_param_store() for i in range(num_bo_steps): result = gpbo.get_step(f, None, verbose=True) print(f.__doc__) print(result)
def find_a_candidate(x_init, gpmodel, lower_bound=0, upper_bound=1, sampling_type="MC", sample_size=20): # transform x to an unconstrained domain #ipdb.set_trace() constraint = constraints.interval(lower_bound, upper_bound) unconstrained_x_init = transform_to(constraint).inv(x_init) unconstrained_x = torch.tensor(unconstrained_x_init, requires_grad=True) #minimizer = optim.LBFGS([unconstrained_x]) minimizer = optim.Adam([unconstrained_x], lr=0.001) def closure(): #ipdb.set_trace() minimizer.zero_grad() x = transform_to(constraint)(unconstrained_x) y = q_expected_improvement(x, gpmodel, sampling_type=sampling_type, sample_size=sample_size) autograd.backward(unconstrained_x, autograd.grad(y, unconstrained_x)) return y minimizer.step(closure) # after finding a candidate in the unconstrained domain, # convert it back to original domain. x = transform_to(constraint)(unconstrained_x) return x.detach()
class TanhTransform(Transform): r""" Transform via the mapping :math:`y = \tanh(x)`. It is equivalent to ``` ComposeTransform([AffineTransform(0., 2.), SigmoidTransform(), AffineTransform(-1., 2.)]) ``` However this might not be numerically stable, thus it is recommended to use `TanhTransform` instead. Note that one should use `cache_size=1` when it comes to `NaN/Inf` values. """ domain = constraints.real codomain = constraints.interval(-1.0, 1.0) bijective = True sign = +1 def __eq__(self, other): return isinstance(other, TanhTransform) def _call(self, x): return x.tanh() def _inverse(self, y): # We do not clamp to the boundary here as it may degrade the performance of certain algorithms. # one should use `cache_size=1` instead return torch.atanh(y) def log_abs_det_jacobian(self, x, y): # We use a formula that is more numerically stable, see details in the following link # https://github.com/tensorflow/probability/blob/master/tensorflow_probability/python/bijectors/tanh.py#L69-L80 return 2. * (math.log(2.) - x - softplus(-2. * x))
def find_a_candidate_ei(model, likelihood, x_init, lb, ub, previous_best, device): # transform x to an unconstrained domain constraint = constraints.interval(lb, ub) #print(x_init) unconstrained_x_init = transform_to(constraint).inv(x_init) #print(unconstrained_x_init) unconstrained_x = unconstrained_x_init.clone().detach().requires_grad_( True) # WARNING: this is a memory intensive optimizer # TODO: Maybe try other gradient-based iterative methods minimizer = optim.LBFGS([unconstrained_x], max_iter=50) def closure(): minimizer.zero_grad() x = transform_to(constraint)(unconstrained_x) y = log_expected_improvement(model, likelihood, x, previous_best, device) #y = lower_confidence_bound(unconstrained_x) #print(autograd.grad(y, unconstrained_x)) #print(y) autograd.backward(unconstrained_x, autograd.grad(y, unconstrained_x)) return y minimizer.step(closure) # after finding a candidate in the unconstrained domain, # convert it back to original domain. x = transform_to(constraint)(unconstrained_x) return x.detach()
def find_a_candidate(self, x_init): """ Performs multistart optimization using BFGS within Pytorch :param x_init: initial guess :type x_init: tensor :return: resulted optimum :rtype: tensor detached from gradient """ # transform x to an unconstrained domain constraint = constraints.interval( torch.from_numpy(self.bounds[0]).type(torch.FloatTensor), torch.from_numpy(self.bounds[1]).type(torch.FloatTensor)) unconstrained_x_init = transform_to(constraint).inv(x_init) unconstrained_x = unconstrained_x_init.clone().detach().requires_grad_( True) minimizer = optim.LBFGS([unconstrained_x], line_search_fn='strong_wolfe') def closure(): minimizer.zero_grad() x = transform_to(constraint)(unconstrained_x) y = self.acquisition_func(x) autograd.backward(unconstrained_x, autograd.grad(y, unconstrained_x)) return y minimizer.step(closure) # after finding a candidate in the unconstrained domain, # convert it back to original domain. x = transform_to(constraint)(unconstrained_x) return x.detach()
def find_a_candidate(self, gpmodel, x_init, lower_bound=0, upper_bound=1): assert(len(x_init.shape) == 1) # transform x to an unconstrained domain constraint = constraints.interval(lower_bound, upper_bound) # ????? What is this step ????? unconstrained_x_init = transform_to(constraint).inv(x_init) # Object of unconstrained_x_init: [] -> [[]] unconstrained_x_init = unconstrained_x_init.view(-1,x_init.shape[0]) unconstrained_x = unconstrained_x_init.clone().detach().requires_grad_(True) minimizer = optim.LBFGS([unconstrained_x]) def closure(): minimizer.zero_grad() x = transform_to(constraint)(unconstrained_x) # Object of x: [[]] -> [] x = x[0] y = self.lower_confidence_bound(x, gpmodel) autograd.backward(unconstrained_x, autograd.grad(y, unconstrained_x)) return y minimizer.step(closure) # after finding a candidate in the unconstrained domain, # convert it back to original domain. # Object of unconstrained_x: [[]] -> [] unconstrained_x = unconstrained_x[0] x = transform_to(constraint)(unconstrained_x) return x.detach()
class RescaledBeta(TransformedDistribution): arg_constraints = { 'concentration1': constraints.positive, 'concentration0': constraints.positive } support = constraints.interval(-1., 1.) has_rsample = True def __init__(self, concentration1, concentration0, validate_args=None): base_distribution = Beta(concentration1, concentration0, validate_args=validate_args) super(RescaledBeta, self).__init__(base_distribution=base_distribution, transforms=AffineTransform(loc=-1., scale=2.)) def entropy(self): return self.base_dist.entropy() + math.log(2.) def sample(self, sample_shape=torch.Size()): out = super(RescaledBeta, self).sample(sample_shape) return torch.clamp(out, -1. + eps, 1. - eps) def rsample(self, sample_shape=torch.Size()): out = super(RescaledBeta, self).rsample(sample_shape) return torch.clamp(out, -1. + eps, 1. - eps)
def __call__(self, batch=None, temp = 1.0): posterior = {} for node, site in self.model_trace.iter_stochastic_nodes(): par = self.prior_median[node] if (node == "user-init-plate") | (node == "data"): pass elif node == 'h0-batch': mean = pyro.param(f"h0-mean", init_tensor = 0.1*torch.rand((self.num_users, self.init_dim)), constraint = constraints.interval(-1.0,1.0)) scale = pyro.param(f"h0-scale", init_tensor=0.2 + 0.01 * 0.01*torch.rand((self.num_users, self.init_dim)), constraint=constraints.interval(0,self.maxscale)) if self.user_init is False: mean = torch.zeros_like(mean) scale = 0.001*torch.ones_like(scale) with pyro.plate("user-init-plate", size = self.num_users, subsample = batch['userId']): posterior[node] = pyro.sample( node, dist.Normal(mean[batch['userId']], temp*scale[batch['userId']]).to_event(1)) elif node == "user_model.gamma": mean = pyro.param(f"{node}-mean", init_tensor= par.detach().clone(), constraint = constraints.interval(0,1.0)) scale = pyro.param(f"{node}-scale", init_tensor=0.01 + 0.05 * par.detach().clone().abs(), constraint=constraints.interval(0,self.maxscale)) posterior[node] = pyro.sample( node, dist.Normal(mean, temp*scale).independent()) elif "item_model" in node: mean = pyro.param(f"{node}-mean", init_tensor= par.detach().clone().clamp(-1.0,1.0), constraint=constraints.interval(-1,1)) scale = pyro.param(f"{node}-scale", init_tensor=0.01 + 0.05 * par.detach().clone().abs(), constraint=constraints.interval(0,self.maxscale)) posterior[node] = pyro.sample( node, dist.Normal(mean, temp*scale).independent()).clamp(-1.0,1.0) else: mean = pyro.param(f"{node}-mean", init_tensor= par.detach().clone(), constraint = constraints.interval(-5.0,5.0) ) scale = pyro.param(f"{node}-scale", init_tensor=0.01 + 0.05 * par.detach().clone().abs(), constraint=constraints.interval(0,self.maxscale)) posterior[node] = pyro.sample( node, dist.Normal(mean, temp*scale).independent()) return posterior
def guide(noise): noise_terms = list(noise.keys()) mu_constraints = constraints.interval(-3., 3.) sigma_constraints = constraints.interval(.0001, 3) mu = { k: pyro.param('{}_mu'.format(k), tensor(0.), constraint=mu_constraints) for k in noise_terms } sigma = { k: pyro.param('{}_sigma'.format(k), tensor(1.), constraint=sigma_constraints) for k in noise_terms } for noise in noise_terms: sample(noise, Normal(mu[noise], sigma[noise]))
def guide(noise): mu_constraints = constraints.interval(-3., 3.) sigma_constraints = constraints.interval(.0001, 3) mu = { k: pyro.param( f'{k}_mu', tensor(0.), constraint=mu_constraints, ) for k in noise } sigma = { k: pyro.param( f'{k}_sigma', tensor(1.), constraint=sigma_constraints, ) for k in noise } for k in noise: sample(k, Normal(mu[k], sigma[k]))
def __init__(self, test, kappa): self.seed = 1 self.noise = 0.01 self.num_acquisitions = 4 self.test = test self.lower_bound = test.getMin() self.upper_bound = test.getMax() self.gpbo = GPBayesOptimizer( constraints.interval(self.lower_bound, self.upper_bound), self.getGPmodel(), self.num_acquisitions, test.getNoArgs(), kappa)
def __init__(self, ranges): """ Constructor for a target function - processess ranges into constrains. ranges: range for each dimension """ self.ranges = ranges self.contstraints = [] for range_el in ranges: self.contstraints.append( constraints.interval(range_el[0], range_el[1]))
class StableTanh(td.Transform): r"""Invertable transformation (bijector) that computes :math:`Y = tanh(X)`, therefore :math:`Y \in (-1, 1)`. This can be achieved by an affine transform of the Sigmoid transformation, i.e., it is equivalent to applying a list of transformations sequentially: .. code-block:: python transforms = [AffineTransform(loc=0, scale=2) SigmoidTransform(), AffineTransform( loc=-1, scale=2] However, using the ``StableTanh`` transformation directly is more numerically stable. """ domain = constraints.real codomain = constraints.interval(-1.0, 1.0) bijective = True sign = +1 def __init__(self, cache_size=1): # We use cache by default as it is numerically unstable for inversion super().__init__(cache_size=cache_size) def __eq__(self, other): return isinstance(other, StableTanh) def _call(self, x): return torch.tanh(x) def _inverse(self, y): # Based on https://github.com/tensorflow/agents/commit/dfb8c85a01d65832b05315928c010336df13f7b9#diff-a572e559b953f965c5c2cd1b9ded2c7b # 0.99999997 is the maximum value such that atanh(x) is valid for both # float32 and float64 def _atanh(x): return 0.5 * torch.log((1 + x) / (1 - x)) y = torch.where( torch.abs(y) <= 1.0, torch.clamp(y, -0.99999997, 0.99999997), y) return _atanh(y) def log_abs_det_jacobian(self, x, y): return 2.0 * ( torch.log(torch.tensor(2.0, dtype=x.dtype, requires_grad=False)) - x - nn.functional.softplus(-2.0 * x))
def support(self): return constraints.interval(self.low, self.high)