def __init__(self, dim, rank): self.dim = dim self.rank = rank self.loc1 = dist.Laplace(0, 1).sample((dim, )) self.scale1 = dist.Exponential(1).sample((dim, )) self.loc2 = dist.Laplace(0, 1).sample((rank, )) self.scale2 = dist.Exponential(1).sample((rank, )) self.mat = dist.Normal(0, 1).sample((dim, rank))
def forward(self, f_loc, f_var, y=None): y_var = f_var + self.scale y_dist = dist.Laplace(f_loc, y_var) if y is not None: y_dist = y_dist.expand_by(y.shape[:-f_loc.dim()]).to_event(y.dim()) return pyro.sample("y", y_dist, obs=y)
def get_accel(self) -> float: accel = 0.0 if 17 <= self.age < 21: accel = distrs.Laplace(0.25, 0.15)().item() # mean = 0.25, sigma = 0.15 elif 21 <= self.age < 26: accel = distrs.Normal(0.15, 0.15)().item() # mean = 0.15, sigma = 0.15 elif 26 <= self.age < 45: accel = distrs.Normal(0.0, 0.4)().item() # mean = 0, sigma = 0.4 elif 45 <= self.age < 65: accel = random_noise().item() else: condition = { 0: "Normal", 1: "Fast" }[distrs.Bernoulli(0.3)().item()] # success_prob = 0.3 if condition == "Normal": accel = random_noise().item() else: accel = distrs.Uniform( 0.2, 0.35)().item() # lower_bound = 0.2, upper_bound = 0.35 return accel
def do_bayesian_fitting(self, *args, fit_intercept=False, **kwargs) \ -> Tuple[BayesianRegression, ndarray, SCFormFourierSeriesProcessor]: """ 利用bayesian regression去fit傅里叶级数 :param fit_intercept: 不允许fit截距,因为那说到底就是base分量的幅值,base分量没有的话会被自动添加 :param :return: 元素0是fit好的BayesianRegression对象, 元素1是BayesianRegression对象预测的值(即:模型输出), 元素2是基于BayesianRegression对象的coef属性生成SCFormFourierSeriesProcessor对象 """ x_matrix = torch.tensor(self._form_x_matrix(), dtype=torch.float) y = torch.tensor(self.target.values.flatten(), dtype=torch.float) # 用lasso估计作为初值 initial_guess = BayesianRegression.lasso_results(x_matrix, y, False)[0] # 用Laplace分布作为prior # TODO add constrain weight_prior = dist.Laplace( torch.tensor(initial_guess, dtype=torch.float).reshape([1, x_matrix.shape[-1]]), 3.).to_event(2) bayesian_regression = BayesianRegression(x_matrix.shape[-1], 1, fit_intercept=False, weight_prior=weight_prior) # mcmc mcmc_run_results = bayesian_regression.run_mcmc(x_matrix, y, num_samples=300, warmup_steps=100) # DEBUG tt = 1
def model(self, zero_data, covariates): with pyro.plate_stack("batch", zero_data.shape[:-2], rightmost_dim=-2): loc = zero_data[..., :1, :] scale = pyro.sample("scale", dist.LogNormal(loc, 1).to_event(1)) with self.time_plate: jumps = pyro.sample("jumps", dist.Normal(0, scale).to_event(1)) prediction = jumps.cumsum(-2) noise_dist = dist.Laplace(0, 1) self.predict(noise_dist, prediction)
def model(self, data): output_size = self.encoder.insize decoder = pyro.module("decoder", self.decoder) # decoder takes z and std in the transformed coordinate frame # and the theta # and outputs an upright image with pyro.plate(data.shape[0]): # prior for z z = pyro.sample( "z", D.Normal( torch.zeros(decoder.z_dim, device=data.device), torch.ones(decoder.z_dim, device=data.device), ).to_event(1), ) # given a z, the decoder produces an "image" # this image must be transformed from the self consistent basis # to real world basis # first, z and std for the self consistent basis is outputted # then it is transfomed view = decoder(z) pyro.deterministic("canonical_view", view) # pyro.deterministic # is like pyro.sample but it is deterministic...? # all of this is completely independent of the input # maybe this is the "prior for the transformation" # and hence it looks completely independent of the input # but when the model is run again, these variables are replayed # with the theta generated by the guide # makes sense # so the model replays with theta and mu and sigma generated by # the guide, # taking theta and mu sigma and applying the inverse transform # to get the output image. grid = coordinates.identity_grid( [output_size, output_size], device=data.device) grid = grid.expand(data.shape[0], *grid.shape) transform = random_pose_transform(self.transforms) transform_grid = transform(grid) # output from decoder is transormed in do a different coordinate system transformed_view = T.broadcasting_grid_sample(view, transform_grid) # view from decoder outputs an image pyro.sample( "pixels", D.Laplace(transformed_view, 0.5).to_event(3),obs=data)
def get_jmIgnoreFoeProb(self) -> float: jmIgnoreFoeProb = 0.0 if self.age < 23: jmIgnoreFoeProb = distrs.Uniform( -0.3, 0.3)().item() # lower_bound = -0.5, upper_bound = 0.5 elif 23 <= self.age < 40: jmIgnoreFoeProb = distrs.Laplace( 0.12, 0.12)().item() # mean = 0.1, sigma = 0.1 else: jmIgnoreFoeProb = distrs.Normal( 0.1, 0.06)().item() # mean = 0.11, sigma = 0.03 return jmIgnoreFoeProb
def get_minGapLat(self) -> float: minGapLat = 0.0 if self.age < 30: minGapLat = distrs.Laplace( -0.2, 0.05)().item() # mean = -0.2, sigma = 0.05 elif 30 <= self.age < 50: minGapLat = distrs.Normal(0.0, 0.5)().item() # mean = 0.0, sigma = 0.5 else: minGapLat = distrs.Uniform( -0.2, 0.2)().item() # lower_bound = -0.2, upper_bound = 0.2 return minGapLat
def forward(self, x: T, labels: T) -> T: weight = pyro.sample( f"{self._pyro_name}.weight", dist.Laplace(0.0, self.lam_p_scale).expand([self.n_input]).to_event(1), ) bias = pyro.sample( f"{self._pyro_name}.bias", dist.Normal(0.0, self.bias_p_scale).expand([self.n_conditions ]).to_event(1), ) return self.logit_mean_sigmoid(x, weight, bias, labels)
def sample_guide(self): pyro.sample( f"{self._pyro_name}.weight", dist.Laplace(self.weight_loc, self.weight_scale).to_event(1), ) bias = pyro.sample( f"{self._pyro_name}.bias", dist.Normal(self.bias_loc, self.bias_scale).to_event(1), ) pyro.factor( f"{self._pyro_name}.monotonic_bias", -self.alpha * torch.clamp(bias[:-1] - bias[1:], 0).sum(), )
def forward_model( data, transforms=None, instantiate_label=False, cond=True, decoder=None, output_size=128, device=torch.device("cpu"), kl_beta=1.0, **kwargs, ): decoder = pyro.module("view_decoder", decoder) N = data.shape[0] with poutine.scale_messenger.ScaleMessenger(1 / N): with pyro.plate("batch", N): with poutine.scale_messenger.ScaleMessenger(kl_beta): z = pyro.sample( "z", D.Normal( torch.zeros(N, decoder.latent_dim, device=device), torch.ones(N, decoder.latent_dim, device=device), ).to_event(1), ) # use supervision view = decoder(z) pyro.deterministic("canonical_view", view) grid = coordinates.identity_grid([output_size, output_size], device=device) grid = grid.expand(N, *grid.shape) scale = view.shape[-1] / output_size grid = grid * ( 1 / scale ) # rescales the image co-ordinates so one pixel of the recon corresponds to 1 pixel of the view. transform = random_pose_transform(transforms, device=device) transform_grid = transform(grid) transformed_view = T.broadcasting_grid_sample(view, transform_grid) obs = data if cond else None pyro.sample("pixels", D.Laplace(transformed_view, 0.5).to_event(3), obs=obs)
def model(self, x): # register PyTorch module `decoder` with Pyro pyro.module("decoder", self.decoder) with pyro.plate("data", x.shape[0]): # setup hyperparameters for prior p(z) z_loc = x.new_zeros(torch.Size((x.shape[0], self.z_dim))) z_scale = x.new_ones(torch.Size((x.shape[0], self.z_dim))) # sample from prior (value will be sampled by guide when computing the ELBO) z = pyro.sample("latent", dist.Normal(z_loc, z_scale).to_event(1)) # decode the latent code z loc_img = self.decoder.forward(z) # score against actual images # decoder is where the image goes # is this correct? # Ask lewis, channel, and h and w are dependent, go to event pyro.sample( "obs", dist.Laplace(loc_img, 0.5).to_event(3), obs=x)
def Laplace(_name, loc, scale): return {'x': pyro.sample(_name, dist.Laplace(loc, scale))}
def __call__(self): response = self.response num_of_obs = self.num_of_obs extra_out = {} # smoothing params if self.lev_sm_input < 0: lev_sm = pyro.sample("lev_sm", dist.Uniform(0, 1)) else: lev_sm = torch.tensor(self.lev_sm_input, dtype=torch.double) extra_out['lev_sm'] = lev_sm if self.slp_sm_input < 0: slp_sm = pyro.sample("slp_sm", dist.Uniform(0, 1)) else: slp_sm = torch.tensor(self.slp_sm_input, dtype=torch.double) extra_out['slp_sm'] = slp_sm # residual tuning parameters nu = pyro.sample("nu", dist.Uniform(self.min_nu, self.max_nu)) # prior for residuals obs_sigma = pyro.sample("obs_sigma", dist.HalfCauchy(self.cauchy_sd)) # regression parameters if self.num_of_pr == 0: pr = torch.zeros(num_of_obs) pr_beta = pyro.deterministic("pr_beta", torch.zeros(0)) else: with pyro.plate("pr", self.num_of_pr): # fixed scale ridge if self.reg_penalty_type == 0: pr_sigma = self.pr_sigma_prior # auto scale ridge elif self.reg_penalty_type == 2: # weak prior for sigma pr_sigma = pyro.sample( "pr_sigma", dist.HalfCauchy(self.auto_ridge_scale)) # case when it is not lasso if self.reg_penalty_type != 1: # weak prior for betas pr_beta = pyro.sample( "pr_beta", dist.FoldedDistribution( dist.Normal(self.pr_beta_prior, pr_sigma))) else: pr_beta = pyro.sample( "pr_beta", dist.FoldedDistribution( dist.Laplace(self.pr_beta_prior, self.lasso_scale))) pr = pr_beta @ self.pr_mat.transpose(-1, -2) if self.num_of_nr == 0: nr = torch.zeros(num_of_obs) nr_beta = pyro.deterministic("nr_beta", torch.zeros(0)) else: with pyro.plate("nr", self.num_of_nr): # fixed scale ridge if self.reg_penalty_type == 0: nr_sigma = self.nr_sigma_prior # auto scale ridge elif self.reg_penalty_type == 2: # weak prior for sigma nr_sigma = pyro.sample( "nr_sigma", dist.HalfCauchy(self.auto_ridge_scale)) # case when it is not lasso if self.reg_penalty_type != 1: # weak prior for betas nr_beta = pyro.sample( "nr_beta", dist.FoldedDistribution( dist.Normal(self.nr_beta_prior, nr_sigma))) else: nr_beta = pyro.sample( "nr_beta", dist.FoldedDistribution( dist.Laplace(self.nr_beta_prior, self.lasso_scale))) nr = nr_beta @ self.nr_mat.transpose(-1, -2) if self.num_of_rr == 0: rr = torch.zeros(num_of_obs) rr_beta = pyro.deterministic("rr_beta", torch.zeros(0)) else: with pyro.plate("rr", self.num_of_rr): # fixed scale ridge if self.reg_penalty_type == 0: rr_sigma = self.rr_sigma_prior # auto scale ridge elif self.reg_penalty_type == 2: # weak prior for sigma rr_sigma = pyro.sample( "rr_sigma", dist.HalfCauchy(self.auto_ridge_scale)) # case when it is not lasso if self.reg_penalty_type != 1: # weak prior for betas rr_beta = pyro.sample( "rr_beta", dist.Normal(self.rr_beta_prior, rr_sigma)) else: rr_beta = pyro.sample( "rr_beta", dist.Laplace(self.rr_beta_prior, self.lasso_scale)) rr = rr_beta @ self.rr_mat.transpose(-1, -2) # a hack to make sure we don't use a dimension "1" due to rr_beta and pr_beta sampling r = pr + nr + rr if r.dim() > 1: r = r.unsqueeze(-2) # trend parameters # local trend proportion lt_coef = pyro.sample("lt_coef", dist.Uniform(0, 1)) # global trend proportion gt_coef = pyro.sample("gt_coef", dist.Uniform(-0.5, 0.5)) # global trend parameter gt_pow = pyro.sample("gt_pow", dist.Uniform(0, 1)) # seasonal parameters if self.is_seasonal: # seasonality smoothing parameter if self.sea_sm_input < 0: sea_sm = pyro.sample("sea_sm", dist.Uniform(0, 1)) else: sea_sm = torch.tensor(self.sea_sm_input, dtype=torch.double) extra_out['sea_sm'] = sea_sm # initial seasonality # 33% lift is with 1 sd prob. init_sea = pyro.sample( "init_sea", dist.Normal(0, 0.33).expand([self.seasonality]).to_event(1)) init_sea = init_sea - init_sea.mean(-1, keepdim=True) b = [None] * num_of_obs # slope l = [None] * num_of_obs # level if self.is_seasonal: s = [None] * (self.num_of_obs + self.seasonality) for t in range(self.seasonality): s[t] = init_sea[..., t] s[self.seasonality] = init_sea[..., 0] else: s = [torch.tensor(0.)] * num_of_obs # states initial condition b[0] = torch.zeros_like(slp_sm) if self.is_seasonal: l[0] = response[0] - r[..., 0] - s[0] else: l[0] = response[0] - r[..., 0] # update process for t in range(1, num_of_obs): # this update equation with l[t-1] ONLY. # intentionally different from the Holt-Winter form # this change is suggested from Slawek's original SLGT model l[t] = lev_sm * (response[t] - s[t] - r[..., t]) + (1 - lev_sm) * l[t - 1] b[t] = slp_sm * (l[t] - l[t - 1]) + (1 - slp_sm) * b[t - 1] if self.is_seasonal: s[t + self.seasonality] = \ sea_sm * (response[t] - l[t] - r[..., t]) + (1 - sea_sm) * s[t] # evaluation process # vectorize as much math as possible for lst in [b, l, s]: # torch.stack requires all items to have the same shape, but the # initial items of our lists may not have batch_shape, so we expand. lst[0] = lst[0].expand_as(lst[-1]) b = torch.stack(b, dim=-1).reshape(b[0].shape[:-1] + (-1, )) l = torch.stack(l, dim=-1).reshape(l[0].shape[:-1] + (-1, )) s = torch.stack(s, dim=-1).reshape(s[0].shape[:-1] + (-1, )) lgt_sum = l + gt_coef * l.abs()**gt_pow + lt_coef * b lgt_sum = torch.cat([l[..., :1], lgt_sum[..., :-1]], dim=-1) # shift by 1 # a hack here as well to get rid of the extra "1" in r.shape if r.dim() >= 2: r = r.squeeze(-2) yhat = lgt_sum + s[..., :num_of_obs] + r with pyro.plate("response_plate", num_of_obs - 1): pyro.sample("response", dist.StudentT(nu, yhat[..., 1:], obs_sigma), obs=response[1:]) # we care beta not the pr_beta, nr_beta, ... extra_out['beta'] = torch.cat([pr_beta, nr_beta, rr_beta], dim=-1) extra_out.update({'b': b, 'l': l, 's': s, 'lgt_sum': lgt_sum}) return extra_out
def likelihood(self, img): return dist.Laplace(img, torch.ones_like(img)).to_event(1)
def beta_l1_loss(self): return -dist.Laplace(0., 1.).log_prob(self.decoder.beta.weight).sum()
def lap(mean: float, sigma: float) -> float: return distrs.Laplace(mean, sigma)().item()
def make_dist(loc, scale): return dist.Laplace(loc, scale)
yhat = torch.stack( [torch.sigmoid((W[i]*x).sum(dim=1) + B[i]) for i in range(0, 3)], dim=1 ) with pyro.plate("data", n_observations): # sampling 0-1 labels from Bernoulli distribution y = pyro.sample("y", dist.Categorical(yhat), obs=y) def log_reg_guide(x, y=None): n_observations, n_predictors = x.shape w_loc_zero = pyro.param("w_loc_zero", torch.rand(n_predictors)) w_scale_zero = pyro.param("w_scale_zero", torch.rand(n_predictors), constraint=constraints.positive) w_zero = pyro.sample("w_0", dist.Laplace(w_loc_zero, w_scale_zero)) b_loc_zero = pyro.param("b_loc_zero", torch.rand(1)) b_scale_zero = pyro.param("b_scale_zero", torch.rand(1), constraint=constraints.positive) b_zero = pyro.sample("b_0", dist.Normal(b_loc_zero, b_scale_zero)) w_loc_one = pyro.param("w_loc_one", torch.rand(n_predictors)) w_scale_one = pyro.param("w_scale_one", torch.rand(n_predictors), constraint=constraints.positive) w_one = pyro.sample("w_1", dist.Laplace(w_loc_one, w_scale_one)) b_loc_one = pyro.param("b_loc_one", torch.rand(1)) b_scale_one = pyro.param("b_scale_one", torch.rand(1), constraint=constraints.positive) b_one = pyro.sample("b_1", dist.Normal(b_loc_one, b_scale_one)) w_loc_two = pyro.param("w_loc_two", torch.rand(n_predictors)) w_scale_two = pyro.param("w_scale_two", torch.rand(n_predictors), constraint=constraints.positive)