def model(self, data): max_var, data1, data2 = data ### 1. prior over mean M M = pyro.sample( "M", dist.StudentT(1, 0, 3).expand_by([data1.size(0), data1.size(1)]).to_event(2)) ### 2. Prior over variances for the normal distribution U = pyro.sample( "U", dist.HalfNormal(1).expand_by([data1.size(0)]).to_event(1)) U = U.reshape(data1.size(0), 1).repeat(1, 3).view( -1) #Triplicate the rows for the subsequent mean calculation ## 3. prior over translations T_i: Sample translations for each of the x,y,z coordinates T2 = pyro.sample("T2", dist.Normal(0, 1).expand_by([3]).to_event(1)) ## 4. prior over rotations R_i ri_vec = pyro.sample("ri_vec", dist.Uniform(0, 1).expand_by( [3]).to_event(1)) # Uniform distribution R = self.sample_R(ri_vec) M_T1 = M M_R2_T2 = M @ R + T2 # 5. Likelihood with pyro.plate("plate_univariate", data1.size(0) * data1.size(1), dim=-1): pyro.sample("X1", dist.StudentT(1, M_T1.view(-1), U), obs=data1.view(-1)) pyro.sample("X2", dist.StudentT(1, M_R2_T2.view(-1), U), obs=data2.view(-1))
def model(home_id, away_id, score1_obs=None, score2_obs=None): # priors alpha = pyro.sample("alpha", dist.Normal(0.0, 1.0)) sd_att = pyro.sample( "sd_att", dist.TransformedDistribution( dist.StudentT(3.0, 0.0, 2.5), FoldedTransform(), ), ) sd_def = pyro.sample( "sd_def", dist.TransformedDistribution( dist.StudentT(3.0, 0.0, 2.5), FoldedTransform(), ), ) home = pyro.sample("home", dist.Normal(0.0, 1.0)) # home advantage nt = len(np.unique(home_id)) # team-specific model parameters with pyro.plate("plate_teams", nt): attack = pyro.sample("attack", dist.Normal(0, sd_att)) defend = pyro.sample("defend", dist.Normal(0, sd_def)) # likelihood theta1 = torch.exp(alpha + home + attack[home_id] - defend[away_id]) theta2 = torch.exp(alpha + attack[away_id] - defend[home_id]) with pyro.plate("data", len(home_id)): pyro.sample("s1", dist.Poisson(theta1), obs=score1_obs) pyro.sample("s2", dist.Poisson(theta2), obs=score2_obs)
def model(N=10): # Sample N-3 random points according to a Normal distribution # The plates render all the coordinates independent plate1=pyro.plate("aa", N-3, dim=-2) plate2=pyro.plate("coord", 3, dim=-1) with plate1, plate2: M_last = pyro.sample("M", dist.Normal(0, 20)) # Stack fixed and moving coordinates M=torch.cat((M_first, M_last)) # Make sure bond distances are around 3.8 Å # Standard deviation of bonds #sb=pyro.sample("sigma_bond", dist.HalfCauchy(scale=0.1)) # Calculate bond distances # (skip first two bonds, as they are fixed) bonds=torch.dist(M[2:-1], M[3:]) with pyro.plate("bonds"): bond_obs=pyro.sample("bonds", dist.StudentT(1, bonds, 0.001), obs=torch.tensor(3.8)) # Add a distance restraint between first and last point # Standard deviation of pairwise distance sd=pyro.sample("sigma_dist", dist.HalfCauchy(scale=0.1)) d = torch.dist(M[0], M[-1]) d_obs = pyro.sample("d_obs", dist.StudentT(1, d, 0.001), obs=torch.tensor(10))
def model(): with pyro.plate_stack("plates", shape): with pyro.plate("particles", 200000): if "dist_type" == "Normal": pyro.sample("x", dist.Normal(loc, scale)) else: pyro.sample("x", dist.StudentT(10.0, loc, scale))
def model(self, zero_data, covariates): assert zero_data.size(-1) == 1 # univariate duration = zero_data.size(-2) time, feature = covariates[..., 0], covariates[..., 1:] bias = pyro.sample("bias", dist.Normal(0, 10)) # construct a linear trend; we know that the sales are increasing # through years, so a positive-support prior should be used here trend_coef = pyro.sample("trend", dist.LogNormal(-2, 1)) trend = trend_coef * time # set prior of weights of the remaining covariates weight = pyro.sample( "weight", dist.Normal(0, 1).expand([feature.size(-1)]).to_event(1)) regressor = (weight * feature).sum(-1) # encode the additive weekly seasonality with pyro.plate("day_of_week", 7, dim=-1): seasonal = pyro.sample("seasonal", dist.Normal(0, 5)) seasonal = periodic_repeat(seasonal, duration, dim=-1) # make prediction prediction = bias + trend + seasonal + regressor # because Pyro forecasting framework is multivariate, # for univariate timeseries we need to make sure that # the last dimension is 1 prediction = prediction.unsqueeze(-1) # Now, we will use heavy tail noise because the data has some outliers # (such as Christmas day) dof = pyro.sample("dof", dist.Uniform(1, 10)) noise_scale = pyro.sample("noise_scale", dist.LogNormal(-2, 1)) noise_dist = dist.StudentT(dof.unsqueeze(-1), 0, noise_scale.unsqueeze(-1)) self.predict(noise_dist, prediction)
def model(): with pyro.plate_stack("plates", shape): if "dist_type" == "Normal": return pyro.sample("x", dist.Normal(loc, scale)) elif "dist_type" == "StudentT": return pyro.sample("x", dist.StudentT(10.0, loc, scale)) else: return pyro.sample("x", dist.AsymmetricLaplace(loc, scale, 1.5))
def model(N=None, mu_loc=None, mu_scale=None, s=None, tau_df=None, tau_scale=None, y=None): theta_raw = sample('theta_raw', ImproperUniform(shape=N)) mu = sample('mu', ImproperUniform()) tau = sample('tau', ImproperUniform()) theta = zeros(N) theta = tau * theta_raw + mu sample('mu' + '__1', dist.Normal(mu_loc, mu_scale), obs=mu) sample('tau' + '__2', dist.StudentT(tau_df, 0.0, tau_scale), obs=tau) sample('theta_raw' + '__3', dist.Normal(zeros(N), 1.0), obs=theta_raw) sample('y' + '__4', dist.Normal(theta, s), obs=y)
def model(self, zero_data, covariates): num_stores, num_depts, duration, one = zero_data.shape time, feature = covariates[..., :1], covariates[..., 1:] store_plate = pyro.plate("store", num_stores, dim=-3) dept_plate = pyro.plate("dept", num_depts, dim=-2) day_of_week_plate = pyro.plate("day_of_week", 7, dim=-1) with dept_plate, store_plate: bias = pyro.sample("bias", dist.Normal(0, 10).expand([1]).to_event(1)) trend_coef = pyro.sample( "trend", dist.LogNormal(-1, 1).expand([1]).to_event(1)) trend = trend_coef * time # set prior of weights of the remaining covariates weight = pyro.sample( "weight", dist.Normal(0, 1).expand([1, feature.size(-1)]).to_event(2)) regressor = weight.matmul(feature.unsqueeze(-1)).squeeze(-1) # encode weekly seasonality with day_of_week_plate: seasonal = pyro.sample( "seasonal", dist.Normal(0, 1).expand([1]).to_event(1)) seasonal = periodic_repeat(seasonal, duration, dim=-2) noise_scale = pyro.sample( "noise_scale", dist.LogNormal(-1, 1).expand([1]).to_event(1)) prediction = bias + trend + seasonal + regressor dof = pyro.sample("dof", dist.Uniform(1, 10).expand([1]).to_event(1)) noise_dist = dist.StudentT(dof, zero_data, noise_scale) self.predict(noise_dist, prediction)
def make_dist(df, loc, scale): return dist.StudentT(df, loc, scale)
def random_studentt(shape): df = torch.rand(shape).exp() loc = torch.randn(shape) scale = torch.rand(shape).exp() return dist.StudentT(df, loc, scale)
def StudentT(_name, df, loc, scale): return {'x': pyro.sample(_name, dist.StudentT(df, loc, scale))}
def __call__(self): response = self.response num_of_obs = self.num_of_obs extra_out = {} # smoothing params if self.lev_sm_input < 0: lev_sm = pyro.sample("lev_sm", dist.Uniform(0, 1)) else: lev_sm = torch.tensor(self.lev_sm_input, dtype=torch.double) extra_out['lev_sm'] = lev_sm if self.slp_sm_input < 0: slp_sm = pyro.sample("slp_sm", dist.Uniform(0, 1)) else: slp_sm = torch.tensor(self.slp_sm_input, dtype=torch.double) extra_out['slp_sm'] = slp_sm # residual tuning parameters nu = pyro.sample("nu", dist.Uniform(self.min_nu, self.max_nu)) # prior for residuals obs_sigma = pyro.sample("obs_sigma", dist.HalfCauchy(self.cauchy_sd)) # regression parameters if self.num_of_pr == 0: pr = torch.zeros(num_of_obs) pr_beta = pyro.deterministic("pr_beta", torch.zeros(0)) else: with pyro.plate("pr", self.num_of_pr): # fixed scale ridge if self.reg_penalty_type == 0: pr_sigma = self.pr_sigma_prior # auto scale ridge elif self.reg_penalty_type == 2: # weak prior for sigma pr_sigma = pyro.sample( "pr_sigma", dist.HalfCauchy(self.auto_ridge_scale)) # case when it is not lasso if self.reg_penalty_type != 1: # weak prior for betas pr_beta = pyro.sample( "pr_beta", dist.FoldedDistribution( dist.Normal(self.pr_beta_prior, pr_sigma))) else: pr_beta = pyro.sample( "pr_beta", dist.FoldedDistribution( dist.Laplace(self.pr_beta_prior, self.lasso_scale))) pr = pr_beta @ self.pr_mat.transpose(-1, -2) if self.num_of_nr == 0: nr = torch.zeros(num_of_obs) nr_beta = pyro.deterministic("nr_beta", torch.zeros(0)) else: with pyro.plate("nr", self.num_of_nr): # fixed scale ridge if self.reg_penalty_type == 0: nr_sigma = self.nr_sigma_prior # auto scale ridge elif self.reg_penalty_type == 2: # weak prior for sigma nr_sigma = pyro.sample( "nr_sigma", dist.HalfCauchy(self.auto_ridge_scale)) # case when it is not lasso if self.reg_penalty_type != 1: # weak prior for betas nr_beta = pyro.sample( "nr_beta", dist.FoldedDistribution( dist.Normal(self.nr_beta_prior, nr_sigma))) else: nr_beta = pyro.sample( "nr_beta", dist.FoldedDistribution( dist.Laplace(self.nr_beta_prior, self.lasso_scale))) nr = nr_beta @ self.nr_mat.transpose(-1, -2) if self.num_of_rr == 0: rr = torch.zeros(num_of_obs) rr_beta = pyro.deterministic("rr_beta", torch.zeros(0)) else: with pyro.plate("rr", self.num_of_rr): # fixed scale ridge if self.reg_penalty_type == 0: rr_sigma = self.rr_sigma_prior # auto scale ridge elif self.reg_penalty_type == 2: # weak prior for sigma rr_sigma = pyro.sample( "rr_sigma", dist.HalfCauchy(self.auto_ridge_scale)) # case when it is not lasso if self.reg_penalty_type != 1: # weak prior for betas rr_beta = pyro.sample( "rr_beta", dist.Normal(self.rr_beta_prior, rr_sigma)) else: rr_beta = pyro.sample( "rr_beta", dist.Laplace(self.rr_beta_prior, self.lasso_scale)) rr = rr_beta @ self.rr_mat.transpose(-1, -2) # a hack to make sure we don't use a dimension "1" due to rr_beta and pr_beta sampling r = pr + nr + rr if r.dim() > 1: r = r.unsqueeze(-2) # trend parameters # local trend proportion lt_coef = pyro.sample("lt_coef", dist.Uniform(0, 1)) # global trend proportion gt_coef = pyro.sample("gt_coef", dist.Uniform(-0.5, 0.5)) # global trend parameter gt_pow = pyro.sample("gt_pow", dist.Uniform(0, 1)) # seasonal parameters if self.is_seasonal: # seasonality smoothing parameter if self.sea_sm_input < 0: sea_sm = pyro.sample("sea_sm", dist.Uniform(0, 1)) else: sea_sm = torch.tensor(self.sea_sm_input, dtype=torch.double) extra_out['sea_sm'] = sea_sm # initial seasonality # 33% lift is with 1 sd prob. init_sea = pyro.sample( "init_sea", dist.Normal(0, 0.33).expand([self.seasonality]).to_event(1)) init_sea = init_sea - init_sea.mean(-1, keepdim=True) b = [None] * num_of_obs # slope l = [None] * num_of_obs # level if self.is_seasonal: s = [None] * (self.num_of_obs + self.seasonality) for t in range(self.seasonality): s[t] = init_sea[..., t] s[self.seasonality] = init_sea[..., 0] else: s = [torch.tensor(0.)] * num_of_obs # states initial condition b[0] = torch.zeros_like(slp_sm) if self.is_seasonal: l[0] = response[0] - r[..., 0] - s[0] else: l[0] = response[0] - r[..., 0] # update process for t in range(1, num_of_obs): # this update equation with l[t-1] ONLY. # intentionally different from the Holt-Winter form # this change is suggested from Slawek's original SLGT model l[t] = lev_sm * (response[t] - s[t] - r[..., t]) + (1 - lev_sm) * l[t - 1] b[t] = slp_sm * (l[t] - l[t - 1]) + (1 - slp_sm) * b[t - 1] if self.is_seasonal: s[t + self.seasonality] = \ sea_sm * (response[t] - l[t] - r[..., t]) + (1 - sea_sm) * s[t] # evaluation process # vectorize as much math as possible for lst in [b, l, s]: # torch.stack requires all items to have the same shape, but the # initial items of our lists may not have batch_shape, so we expand. lst[0] = lst[0].expand_as(lst[-1]) b = torch.stack(b, dim=-1).reshape(b[0].shape[:-1] + (-1, )) l = torch.stack(l, dim=-1).reshape(l[0].shape[:-1] + (-1, )) s = torch.stack(s, dim=-1).reshape(s[0].shape[:-1] + (-1, )) lgt_sum = l + gt_coef * l.abs()**gt_pow + lt_coef * b lgt_sum = torch.cat([l[..., :1], lgt_sum[..., :-1]], dim=-1) # shift by 1 # a hack here as well to get rid of the extra "1" in r.shape if r.dim() >= 2: r = r.squeeze(-2) yhat = lgt_sum + s[..., :num_of_obs] + r with pyro.plate("response_plate", num_of_obs - 1): pyro.sample("response", dist.StudentT(nu, yhat[..., 1:], obs_sigma), obs=response[1:]) # we care beta not the pr_beta, nr_beta, ... extra_out['beta'] = torch.cat([pr_beta, nr_beta, rr_beta], dim=-1) extra_out.update({'b': b, 'l': l, 's': s, 'lgt_sum': lgt_sum}) return extra_out
def __call__(self): """ Notes ----- Labeling system: 1. for kernel level of parameters such as rho, span, nkots, kerenel etc., use suffix _lev and _coef for levels and regression to partition 2. for knots level of parameters such as coef, loc and scale priors, use prefix _lev and _rr _pr for levels, regular and positive regressors to partition 3. reduce ambigious by replacing all greeks by labels more intuitive use _coef, _weight etc. instead of _beta, use _scale instead of _sigma """ response = self.response which_valid = self.which_valid_res n_obs = self.n_obs # n_valid = self.n_valid_res sdy = self.sdy meany = self.mean_y dof = self.dof lev_knot_loc = self.lev_knot_loc seas_term = self.seas_term pr = self.pr rr = self.rr n_pr = self.n_pr n_rr = self.n_rr k_lev = self.k_lev k_coef = self.k_coef n_knots_lev = self.n_knots_lev n_knots_coef = self.n_knots_coef lev_knot_scale = self.lev_knot_scale # mult var norm stuff mvn = self.mvn geometric_walk = self.geometric_walk min_residuals_sd = self.min_residuals_sd if min_residuals_sd > 1.0: min_residuals_sd = torch.tensor(1.0) if min_residuals_sd < 0: min_residuals_sd = torch.tensor(0.0) # expand dim to n_rr x n_knots_coef rr_init_knot_loc = self.rr_init_knot_loc rr_init_knot_scale = self.rr_init_knot_scale rr_knot_scale = self.rr_knot_scale # this does not need to expand dim since it is used as latent grand mean pr_init_knot_loc = self.pr_init_knot_loc pr_init_knot_scale = self.pr_init_knot_scale pr_knot_scale = self.pr_knot_scale # transformation of data regressors = torch.zeros(n_obs) if n_pr > 0 and n_rr > 0: regressors = torch.cat([rr, pr], dim=-1) elif n_pr > 0: regressors = pr elif n_rr > 0: regressors = rr response_tran = response - meany - seas_term # sampling begins here extra_out = {} # levels sampling lev_knot_tran = pyro.sample( "lev_knot_tran", dist.Normal(lev_knot_loc - meany, lev_knot_scale).expand([n_knots_lev]).to_event(1)) lev = (lev_knot_tran @ k_lev.transpose(-2, -1)) # using hierarchical priors vs. multivariate priors if mvn == 0: # regular regressor sampling if n_rr > 0: # pooling latent variables rr_init_knot = pyro.sample( "rr_init_knot", dist.Normal(rr_init_knot_loc, rr_init_knot_scale).to_event(1)) rr_knot = pyro.sample( "rr_knot", dist.Normal( rr_init_knot.unsqueeze(-1) * torch.ones(n_rr, n_knots_coef), rr_knot_scale).to_event(2)) rr_coef = (rr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) # positive regressor sampling if n_pr > 0: if geometric_walk: # TODO: development method pr_init_knot = pyro.sample( "pr_init_knot", dist.FoldedDistribution( dist.Normal(pr_init_knot_loc, pr_init_knot_scale)).to_event(1)) pr_knot_step = pyro.sample( "pr_knot_step", # note that unlike rr_knot, the first one is ignored as we use the initial scale # to sample the first knot dist.Normal(torch.zeros(n_pr, n_knots_coef), pr_knot_scale).to_event(2)) pr_knot = pr_init_knot.unsqueeze(-1) * pr_knot_step.cumsum( -1).exp() pr_coef = (pr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) else: # TODO: original method # pooling latent variables pr_init_knot = pyro.sample( "pr_knot_loc", dist.FoldedDistribution( dist.Normal(pr_init_knot_loc, pr_init_knot_scale)).to_event(1)) pr_knot = pyro.sample( "pr_knot", dist.FoldedDistribution( dist.Normal( pr_init_knot.unsqueeze(-1) * torch.ones(n_pr, n_knots_coef), pr_knot_scale)).to_event(2)) pr_coef = (pr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) else: # regular regressor sampling if n_rr > 0: rr_init_knot = pyro.deterministic( "rr_init_knot", torch.zeros(rr_init_knot_loc.shape)) # updated mod loc_temp = rr_init_knot_loc.unsqueeze(-1) * torch.ones( n_rr, n_knots_coef) scale_temp = torch.diag_embed( rr_init_knot_scale.unsqueeze(-1) * torch.ones(n_rr, n_knots_coef)) # the sampling rr_knot = pyro.sample( "rr_knot", dist.MultivariateNormal( loc=loc_temp, covariance_matrix=scale_temp).to_event(1)) rr_coef = (rr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) # positive regressor sampling if n_pr > 0: # this part is junk just so that the pr_init_knot has a prior; but it does not connect to anything else # pooling latent variables pr_init_knot = pyro.sample( "pr_init_knot", dist.FoldedDistribution( dist.Normal(pr_init_knot_loc, pr_init_knot_scale)).to_event(1)) # updated mod loc_temp = pr_init_knot_loc.unsqueeze(-1) * torch.ones( n_pr, n_knots_coef) scale_temp = torch.diag_embed( pr_init_knot_scale.unsqueeze(-1) * torch.ones(n_pr, n_knots_coef)) pr_knot = pyro.sample( "pr_knot", dist.MultivariateNormal( loc=loc_temp, covariance_matrix=scale_temp).to_event(1)) pr_knot = torch.exp(pr_knot) pr_coef = (pr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) # concatenating all latent variables coef_init_knot = torch.zeros(n_rr + n_pr) coef_knot = torch.zeros((n_rr + n_pr, n_knots_coef)) coef = torch.zeros(n_obs) if n_pr > 0 and n_rr > 0: coef_knot = torch.cat([rr_knot, pr_knot], dim=-2) coef_init_knot = torch.cat([rr_init_knot, pr_init_knot], dim=-1) coef = torch.cat([rr_coef, pr_coef], dim=-1) elif n_pr > 0: coef_knot = pr_knot coef_init_knot = pr_init_knot coef = pr_coef elif n_rr > 0: coef_knot = rr_knot coef_init_knot = rr_init_knot coef = rr_coef # coefficients likelihood/priors coef_prior_list = self.coef_prior_list if coef_prior_list: for x in coef_prior_list: name = x['name'] # TODO: we can move torch conversion to init to enhance speed m = torch.tensor(x['prior_mean']) sd = torch.tensor(x['prior_sd']) # tp = torch.tensor(x['prior_tp_idx']) # idx = torch.tensor(x['prior_regressor_col_idx']) start_tp_idx = x['prior_start_tp_idx'] end_tp_idx = x['prior_end_tp_idx'] idx = x['prior_regressor_col_idx'] pyro.sample("prior_{}".format(name), dist.Normal(m, sd).to_event(2), obs=coef[..., start_tp_idx:end_tp_idx, idx]) # observation likelihood yhat = lev + (regressors * coef).sum(-1) obs_scale_base = pyro.sample("obs_scale_base", dist.Beta(2, 2)).unsqueeze(-1) # from 0.5 * sdy to sdy obs_scale = ((obs_scale_base * (1.0 - min_residuals_sd)) + min_residuals_sd) * sdy # with pyro.plate("response_plate", n_valid): # pyro.sample("response", # dist.StudentT(dof, yhat[..., which_valid], obs_scale), # obs=response_tran[which_valid]) pyro.sample("response", dist.StudentT(dof, yhat[..., which_valid], obs_scale).to_event(1), obs=response_tran[which_valid]) lev_knot = lev_knot_tran + meany extra_out.update({ 'yhat': yhat + seas_term + meany, 'lev': lev + meany, 'lev_knot': lev_knot, 'coef': coef, 'coef_knot': coef_knot, 'coef_init_knot': coef_init_knot, 'obs_scale': obs_scale, }) return extra_out
def model(): with pyro.plate_stack("plates", shape): return pyro.sample("x", dist.StudentT(df, loc, scale))
def model(): with pyro.plate("particles", 20000): return pyro.sample("x", dist.StudentT(df, loc, scale))