def make_model(cls): with pm.Model() as model: sd_mu = np.array([1, 2, 3, 4, 5]) sd_dist = pm.Lognormal.dist(mu=sd_mu, sigma=sd_mu / 10.0, shape=5) chol_packed = pm.LKJCholeskyCov("chol_packed", eta=3, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, chol_packed, lower=True) cov = tt.dot(chol, chol.T) stds = tt.sqrt(tt.diag(cov)) pm.Deterministic("log_stds", tt.log(stds)) corr = cov / stds[None, :] / stds[:, None] corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2 pm.Deterministic("corr_entries_unit", corr_entries_unit) return model
def test_sample_prior_and_posterior(self): def build_toy_dataset(N, K): pi = np.array([0.2, 0.5, 0.3]) mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]] stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]] x = np.zeros((N, 3), dtype=np.float32) y = np.zeros((N, ), dtype=np.int) for n in range(N): k = np.argmax(np.random.multinomial(1, pi)) x[n, :] = np.random.multivariate_normal( mus[k], np.diag(stds[k])) y[n] = k return x, y N = 100 # number of data points K = 3 # number of mixture components D = 3 # dimensionality of the data X, y = build_toy_dataset(N, K) with pm.Model() as model: pi = pm.Dirichlet("pi", np.ones(K), shape=(K, )) comp_dist = [] mu = [] packed_chol = [] chol = [] for i in range(K): mu.append(pm.Normal("mu%i" % i, 0, 10, shape=D)) packed_chol.append( pm.LKJCholeskyCov("chol_cov_%i" % i, eta=2, n=D, sd_dist=pm.HalfNormal.dist(2.5))) chol.append( pm.expand_packed_triangular(D, packed_chol[i], lower=True)) comp_dist.append( pm.MvNormal.dist(mu=mu[i], chol=chol[i], shape=D)) pm.Mixture("x_obs", pi, comp_dist, observed=X) with model: trace = pm.sample(30, tune=10, chains=1) n_samples = 20 with model: ppc = pm.sample_posterior_predictive(trace, n_samples) prior = pm.sample_prior_predictive(samples=n_samples) assert ppc["x_obs"].shape == (n_samples, ) + X.shape assert prior["x_obs"].shape == (n_samples, ) + X.shape assert prior["mu0"].shape == (n_samples, D) assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
def Arodz(x0, x1): """Takes in two sample sets, one from each class, and returns the MAP estimates of the means and covariance """ numberOfFeatures = len(x0[0]) # instantiate an empty PyMC3 model basic_model = pm.Model() # fill the model with details: with basic_model: # parameters for priors for gaussian means mu_prior_cov = 100 * np.eye(numberOfFeatures) mu_prior_mu = np.zeros((numberOfFeatures, )) # Priors for gaussian means (Gaussian prior): mu1 ~ N(mu_prior_mu, mu_prior_cov), mu0 ~ N(mu_prior_mu, mu_prior_cov) mu1 = pm.MvNormal('estimated_mu1', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures) mu0 = pm.MvNormal('estimated_mu0', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures) # Prior for gaussian covariance matrix (LKJ prior): # see here for details: http://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html # and here: http://docs.pymc.io/notebooks/LKJ.html sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=numberOfFeatures) chol_packed = pm.LKJCholeskyCov('chol_packed', n=numberOfFeatures, eta=2, sd_dist=sd_dist) chol = pm.expand_packed_triangular(numberOfFeatures, chol_packed) cov_mx = pm.Deterministic('estimated_cov', chol.dot(chol.T)) # observations x1, x0 are supposed to be P(x|y=class1)=N(mu1,cov_both), P(x|y=class0)=N(mu0,cov_both) # here is where the Dataset (x1,x0) comes to influence the choice of paramters (mu1,mu0, cov_both) # this is done through the "observed = ..." argument; note that above we didn't have that x1_obs = pm.MvNormal('x1', mu=mu1, chol=chol, observed=x1) x0_obs = pm.MvNormal('x0', mu=mu0, chol=chol, observed=x0) # done with setting up the model # now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation # map_estimate1 is a dictionary: "parameter name" -> "it's estimated value" map_estimate1 = pm.find_MAP(model=basic_model) # print(map_estimate1) return map_estimate1['estimated_mu0'], map_estimate1[ 'estimated_mu1'], map_estimate1['estimated_cov']
def covariation(): with pm.Model(coords=coords) as m_covariation: # Inputs idx_ = pm.Data('idx_shared', idx_train, dims = ('idx', 't')) t_ = pm.Data('t_shared', t_train, dims = ('idx', 't')) # prior stddev in intercepts & slopes (variation across counties): sd_dist = pm.HalfNormal.dist(0.5) # distribution. # get back standard deviations and rho: ## eta = 1: uniform (higher --> more weight on low cor.) ## n = 2: number of predictors chol, corr, stds = pm.LKJCholeskyCov( "chol", n=2, eta=2, sd_dist=sd_dist, compute_corr = True) # priors for mean effects alpha = pm.Normal("alpha", mu = 1.5, sigma = 0.5) beta = pm.Normal("beta", mu = 0, sigma = 0.5) # population of varying effects alpha_beta = pm.MvNormal( "alpha_beta", mu = tt.stack([alpha, beta]), chol = chol, dims=("idx", "param")) # expected value per participant at each time-step mu = alpha_beta[idx_, 0] + alpha_beta[idx_, 1] * t_ # model error sigma = pm.HalfNormal("sigma", sigma = 0.5) # likelihood y_pred = pm.Normal( "y_pred", mu = mu, sigma = sigma, observed = y_train, dims = ('idx', 't')) # return the model return m_covariation
def build_model(data, K): N = data.shape[0] d = data.shape[1] print('Building model with n=%d, d=%d, k=%d' % (N, d, K)) with pm.Model() as gmm: #Prior over component weights if K > 1: p = pm.Dirichlet('p', a=np.array([1.] * K)) #Prior over component means mus = [ pm.MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(d)), tau=pm.floatX(0.1 * np.eye(d)), shape=(d, )) #testval = pm.floatX(np.ones(d))) for i in range(K) ] #Cholesky decomposed LKJ prior over component covariance matrices packed_L = [ pm.LKJCholeskyCov('packed_L_%d' % i, n=d, eta=2., sd_dist=pm.HalfCauchy.dist(1)) #testval = pm.floatX(np.ones(int(d*(d-1)/2+d)))) for i in range(K) ] #Unpack packed_L into full array L = [pm.expand_packed_triangular(d, packed_L[i]) for i in range(K)] #Convert L to sigma and tau for convenience sigma = [ pm.Deterministic('sigma_%d' % i, L[i].dot(L[i].T)) for i in range(K) ] tau = [ pm.Deterministic('tau_%d' % i, matrix_inverse(sigma[i])) for i in range(K) ] #Specify the likelihood if K > 1: mvnl = [pm.MvNormal.dist(mu=mus[i], chol=L[i]) for i in range(K)] Y_obs = pm.Mixture('Y_obs', w=p, comp_dists=mvnl, observed=data) else: Y_obs = pm.MvNormal('Y_obs', mu=mus[0], chol=L[0], observed=data) return gmm
def _multivariate_normal_dist(self, init_mu, suffix=""): if not isinstance(suffix, str): suffix = str(suffix) data_dim = len(init_mu) # prior of covariance sd_dist = pm.HalfCauchy.dist(beta=2.5) packed_chol = pm.LKJCholeskyCov('cov' + suffix, eta=2, n=data_dim, sd_dist=sd_dist) chol = pm.expand_packed_triangular(data_dim, packed_chol, lower=True) # prior of mean mu = pm.MvNormal('mu' + suffix, mu=0, cov=np.eye(data_dim), shape=data_dim) return pm.MvNormal.dist(mu, chol=chol)
def test_mv_missing_data_model(self): data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1) model = pm.Model() with model: mu = pm.Normal("mu", 0, 1, size=2) sd_dist = pm.HalfNormal.dist(1.0) chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True) y = pm.MvNormal("y", mu=mu, chol=chol, observed=data) inference_data = pm.sample(100, chains=2, return_inferencedata=True) # make sure that data is really missing assert isinstance(y.owner.op, AdvancedIncSubtensor) test_dict = { "posterior": ["mu", "chol_cov"], "observed_data": ["y"], "log_likelihood": ["y"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_mv_missing_data_model(self): data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1) model = pm.Model() with model: mu = pm.Normal("mu", 0, 1, shape=2) sd_dist = pm.HalfNormal.dist(1.0) chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True) pm.MvNormal("y", mu=mu, chol=chol, observed=data) trace = pm.sample(100, chains=2) # make sure that data is really missing (y_missing,) = model.missing_values assert y_missing.tag.test_value.shape == (4,) inference_data = from_pymc3(trace=trace, model=model) test_dict = { "posterior": ["mu", "chol_cov"], "observed_data": ["y"], "log_likelihood": ["y"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def __init__(self, data, sigma, mu_prior=[0.0,1000.], sigma_prior=200.): self.fitted=False if np.any(sigma <=0.): raise ValueError("Uncertainties must be positive real numbers!") self.plot_trace_vars = ['mu', "chol_corr"] if data is None: raise ValueError("Either data must be given as input, or x and y") else: self.ndim = data.shape[1] self.npoints = data.shape[0] self.data = data if data.shape != sigma.shape: raise RuntimeError("data and sigma must have the same shape!") self.sigma = sigma self.model = pm.Model() with self.model: #we put weakly informative hyperpriors on the means and standard deviations of the multivariate normal distribution mu = pm.Normal("mu", mu=mu_prior[0], sigma=mu_prior[1], shape=self.ndim) sigma = pm.HalfCauchy.dist(sigma_prior) #and a hyperprior on the covariance matrix which weakly penalises strong correlations chol, corr, stds = pm.LKJCholeskyCov("chol", n=self.ndim, eta=2.0, sd_dist=sigma, compute_corr=True) #the hyperprior gives us the Cholesky Decomposition of the covariance matrix, so for completeness we can calculate that determinisitically cov = pm.Deterministic("cov", chol.dot(chol.T)) #and now we can construct our multivariate normals to complete the prior prior = pm.MvNormal('vals', mu=mu, chol=chol, shape=(self.npoints,self.ndim)) #, observed=self.data) #print(prior) #help(prior) mu1s = prior[:,0] datavars = [] datavars = pm.Normal("data", mu = prior, sigma = self.sigma, observed = self.data) #Finally, we need to define our data #for i in range(self.ndim): # datavars.append(pm.Normal("data_"+str(i), mu=prior[:,i], sigma = self.sigma[:,i], observed=self.data[:,i])) print(datavars)
def sample_LKJ_prior(nu=2, shape=2, n_samples=200000): """ Sample LKJ prior Parameters ---------- nu : float, LKJ prior \nu parameter shape : int dimensionality of the covariance matrix. mcmc_samples : int Number of samples drawn from the prior. Returns ------- r: numpy-array, shape (n_sample, ) MCMC samples. """ with pm.Model() as model_correlation: # generate a sample of sd_dist = pm.Gamma.dist(alpha=2, beta=1, shape=2) chol_packed = pm.LKJCholeskyCov('chol_packed', n=shape, eta=nu, sd_dist=sd_dist) chol = pm.expand_packed_triangular(shape, chol_packed) vals = pm.MvNormal('true_quantities', mu=0.0, chol=chol, shape=(1, shape)) with model_correlation: # Use elliptical slice sampling trace = pm.sample(n_samples, chains=2) r = [] for chol_p in zip(trace['chol_packed'][:]): cov = make_cov_mtx_from_chol_vec(chol_p, ndim=shape) r += [cov[1, 0] / np.sqrt(cov[0, 0] * cov[1, 1])] return r
def run_normal_mv_model(data, K=3, mus=None, mc_samples=10000, jobs=1): with pm.Model() as model: n_samples, n_feats = data.shape #print n_samples,n_feats packed_L = pm.LKJCholeskyCov('packed_L', n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(n_feats, packed_L) sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus #mus = pm.Normal('mus', mu = [[10,10], [55,55], [105,105], [155,155], [205,205]], sd = 10, shape=(K,n_feats)) mus = pm.Normal('mus', mu=mus, sd=10., shape=(K, n_feats), testval=data.mean(axis=0)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) #TODO one pi per voxel category = pm.Categorical('category', p=pi, shape=n_samples) xs = pm.MvNormal('x', mu=mus[category], chol=L, observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) trace = sample(mc_samples, step2, n_jobs=jobs) pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma']) plt.title('normal mv model') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) #if chains > 1: # print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())) return model, mod, trace
def __init__(self, n_to_sample=2000, *args, **kwargs): super(MvStudentTBayesianSolver, self).__init__(*args, **kwargs) self.n_to_sample = n_to_sample self.model = pm.Model() self.shared_data = theano.shared(np.zeros((5, 5)) * 0.5, borrow=True) with self.model: sd_dist = pm.Gamma.dist(alpha=3.0, beta=1.0) #sd_dist = pm.HalfCauchy.dist(beta=2.5) packed_chol = pm.LKJCholeskyCov('chol_cov', eta=2, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, packed_chol, lower=True) cov = pm.Deterministic('cov', theano.dot(chol, chol.T)) self.mu_dist = pm.MvNormal("mu", mu=np.zeros(5), chol=chol, shape=5) observed = pm.MvStudentT('obs', nu=3.5, mu=self.mu_dist, chol=chol, observed=self.shared_data) self.step = pm.Metropolis()
[0.0, -0.06, 1.0, -0.04], [0.15, 0.19, -0.04, 1.0], ] ) cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds))) dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs) with pm.Model() as model: mu = pm.Normal("mu", mu=0, sigma=1, shape=n_var) # Note that we access the distribution for the standard # deviations, and do not create a new random variable. sd_dist = pm.HalfCauchy.dist(beta=2.5) packed_chol = pm.LKJCholeskyCov("chol_cov", n=n_var, eta=1, sd_dist=sd_dist) # compute the covariance matrix chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True) cov = tt.dot(chol, chol.T) # Extract the standard deviations etc sd = pm.Deterministic("sd", tt.sqrt(tt.diag(cov))) corr = tt.diag(sd ** -1).dot(cov.dot(tt.diag(sd ** -1))) r = pm.Deterministic("r", corr[np.triu_indices(n_var, k=1)]) like = pm.MvNormal("likelihood", mu=mu, chol=chol, observed=dataset) def run(n=1000): if n == "short": n = 50
# Priors for gaussian means (Gaussian prior): mu1 ~ N(mu_prior_mu, mu_prior_cov), mu0 ~ N(mu_prior_mu, mu_prior_cov) mu1 = pm.MvNormal('estimated_mu1', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures) mu0 = pm.MvNormal('estimated_mu0', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures) # Prior for gaussian covariance matrix (LKJ prior): # see here for details: http://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html # and here: http://docs.pymc.io/notebooks/LKJ.html sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=numberOfFeatures) chol_packed = pm.LKJCholeskyCov('chol_packed', n=numberOfFeatures, eta=2, sd_dist=sd_dist) chol = pm.expand_packed_triangular(numberOfFeatures, chol_packed) cov_mx = pm.Deterministic('estimated_cov', chol.dot(chol.T)) # observations x1, x0 are supposed to be P(x|y=class1)=N(mu1,cov_both), P(x|y=class0)=N(mu0,cov_both) # here is where the Dataset (x1,x0) comes to influence the choice of paramters (mu1,mu0, cov_both) # this is done through the "observed = ..." argument; note that above we didn't have that x1_obs = pm.MvNormal('x1', mu=mu1, chol=chol, observed=x1) x0_obs = pm.MvNormal('x0', mu=mu0, chol=chol, observed=x0) # done with setting up the model # now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation # map_estimate1 is a dictionary: "parameter name" -> "it's estimated value" map_estimate1 = pm.find_MAP(model=basic_model)
def build(self): with pm.Model() as env_model: # Generate region weights w_r = pm.MvNormal('w_r', mu=self.prior.loc_w_r, tau=self.prior.scale_w_r, shape=self.n_regions) # Generate Product weights #packed_L_p = pm.LKJCholeskyCov('packed_L_p', n=self.n_products, # eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) #L_p = pm.expand_packed_triangular(self.n_products, packed_L_p) mu_p = pm.MvNormal("mu_p", mu=self.prior.loc_w_p, cov=np.eye(self.n_products), shape=self.n_products) #w_p = pm.MvNormal('w_p', mu=mu_p, chol=L_p, # shape=self.n_products) w_p = pm.MvNormal('w_p', mu=mu_p, cov=self.prior.scale_w_p, shape=self.n_products) # Generate previous sales weight loc_w_s = pm.HalfCauchy('loc_w_s', 1.0) scale_w_s = pm.HalfCauchy('scale_w_s', 2.5) w_s = pm.TruncatedNormal('w_s', mu=loc_w_s, sigma=scale_w_s, lower=0.0) # Generate temporal weights packed_L_t = pm.LKJCholeskyCov('packed_L_t', n=self.n_temporal_features, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L_t = pm.expand_packed_triangular(self.n_temporal_features, packed_L_t) mu_t = pm.MvNormal("mu_t", mu=self.prior.loc_w_t, cov=self.prior.scale_w_t, shape=self.n_temporal_features) w_t = pm.MvNormal('w_t', mu=mu_t, chol=L_t, shape=self.n_temporal_features) lambda_c_t = pm.math.dot(self.X_temporal, w_t.T) bias_q_loc = pm.Normal('bias_q_loc', mu=0.0, sigma=1.0) bias_q_scale = pm.HalfCauchy('bias_q_scale', 5.0) bias_q = pm.Normal("bias_q", mu=bias_q_loc, sigma=bias_q_scale) if self.log_linear: lambda_q = pm.math.exp(bias_q + lambda_c_t[self.time_stamps] + pm.math.dot(self.X_region, w_r.T) + pm.math.dot(self.X_product, w_p.T) + w_s * self.X_lagged) else: lambda_q = bias_q + lambda_c_t[self.time_stamps] + pm.math.dot( self.X_region, w_r.T) + pm.math.dot( self.X_product, w_p.T) + w_s * self.X_lagged sigma_q_ij = pm.InverseGamma("sigma_q_ij", alpha=self.prior.loc_sigma_q_ij, beta=self.prior.scale_sigma_q_ij) q_ij = pm.TruncatedNormal('quantity_ij', mu=lambda_q, sigma=sigma_q_ij, lower=0.0, observed=self.y) return env_model
X = X.apply(standardize, axis=0) # mask NA X_masked = np.ma.masked_invalid(X) # model with pm.Model() as model: # priors intercept = pm.Normal('intercept', mu=0, sigma=100) beta = pm.Normal('beta', mu=0, sigma=100, shape=X_masked.shape[1]) kappa = pm.HalfCauchy('kappa', beta=5) # impute missing X chol, stds, corr = pm.LKJCholeskyCov('chol', n=X_masked.shape[1], eta=2, sd_dist=pm.Exponential.dist(1), compute_corr=True) cov = pm.Deterministic('cov', chol.dot(chol.T)) X_mu = pm.Normal('X_mu', mu=0, sigma=100, shape=X_masked.shape[1], testval=X_masked.mean(axis=0)) X_modeled = pm.MvNormal('X', mu=X_mu, chol=chol, observed=X_masked) # observation mu_ = intercept + tt.dot(X_modeled, beta) # likelihood mu = pm.math.invlogit(mu_)
def MultiOutput_Bayesian_Calibration(n_y,DataComp,DataField,DataPred,output_folder): # This is data preprocessing part n = np.shape(DataField)[0] # number of measured data m = np.shape(DataComp)[0] # number of simulation data p = np.shape(DataField)[1] - n_y # number of input x q = np.shape(DataComp)[1] - p - n_y # number of calibration parameters t xc = DataComp[:,n_y:] # simulation input x + calibration parameters t xf = DataField[:,n_y:] # observed input yc = DataComp[:,:n_y] # simulation output yf = DataField[:,:n_y] # observed output x_pred = DataPred[:,n_y:] # design points for predictions y_true = DataPred[:,:n_y] # true measured value for design points for predictions n_pred = np.shape(x_pred)[0] # number of predictions N = n+m+n_pred # Put points xc, xf, and x_pred on [0,1] for i in range(p): x_min = min(min(xc[:,i]),min(xf[:,i])) x_max = max(max(xc[:,i]),max(xf[:,i])) xc[:,i] = (xc[:,i]-x_min)/(x_max-x_min) xf[:,i] = (xf[:,i]-x_min)/(x_max-x_min) x_pred[:,i] = (x_pred[:,i]-x_min)/(x_max-x_min) # Put calibration parameters t on domain [0,1] for i in range(p,(p+q)): t_min = min(xc[:,i]) t_max = max(xc[:,i]) xc[:,i] = (xc[:,i]-t_min)/(t_max-t_min) # store mean and std of yc for future scale back use yc_mean = np.zeros(n_y) yc_sd = np.zeros(n_y) # standardization of output yf and yc for i in range(n_y): yc_mean[i] = np.mean(yc[:,i]) yc_sd[i] = np.std(yc[:,i]) yc[:,i] = (yc[:,i]-yc_mean[i])/yc_sd[i] yf[:,i] = (yf[:,i]-yc_mean[i])/yc_sd[i] # This is modeling part with pm.Model() as model: # Claim prior part eta1 = pm.HalfCauchy("eta1", beta=5) # for eta of gaussian process lengthscale = pm.Gamma("lengthscale", alpha=2, beta=1, shape=(p+q)) # for lengthscale of gaussian process tf = pm.Beta("tf", alpha=2, beta=2, shape=q) # for calibration parameters sigma1 = pm.HalfCauchy('sigma1', beta=5) # for noise y_pred = pm.Normal('y_pred', 0, 1.5, shape=(n_pred,n_y)) # for y prediction # Setup prior of right cholesky matrix sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=n_y) colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=n_y, eta=2,sd_dist=sd_dist) colchol = pm.expand_packed_triangular(n_y, colchol_packed) # Concate data into a big matrix[[xf tf], [xc tc], [x_pred tf]] xf1 = tt.concatenate([xf, tt.fill(tt.zeros([n,q]), tf)], axis = 1) x_pred1 = tt.concatenate([x_pred, tt.fill(tt.zeros([n_pred,q]), tf)], axis = 1) X = tt.concatenate([xf1, xc, x_pred1], axis = 0) # Concate data into a big matrix[[yf], [yc], [y_pred]] y = tt.concatenate([yf, yc, y_pred], axis = 0) # Covariance funciton of gaussian process cov_z = eta1**2 * pm.gp.cov.ExpQuad((p+q), ls=lengthscale) # Gaussian process with covariance funciton of cov_z gp = MultiMarginal(cov_func = cov_z) # Bayesian inference matrix_shape = [n+m+n_pred,n_y] outcome = gp.marginal_likelihood("outcome", X=X, y=y, colchol=colchol, noise=sigma1, matrix_shape=matrix_shape) trace = pm.sample(250,cores=1) # This part is for data collection and visualization pm.summary(trace).to_csv(output_folder + '/trace_summary.csv') print(pm.summary(trace)) name_columns = [] n_columns = n_pred for i in range(n_columns): for j in range(n_y): name_columns.append('y'+str(j+1)+'_pred'+str(i+1)) y_prediction = pd.DataFrame(np.array(trace['y_pred']).reshape(500,n_pred*n_y),columns=name_columns) #Draw Picture of cvrmse_dist and calculate index for i in range(n_y): index = list(range(0+i,n_pred*n_y+i,n_y)) y_prediction1 = pd.DataFrame(y_prediction.iloc[:,index]) y_prediction1 = y_prediction1*yc_sd[i]+yc_mean[i] # Scale y_prediction back y_prediction1.to_csv(output_folder + '/y_pred'+str(i+1)+'.csv') # Store y_prediction # Calculate the distribution of cvrmse cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction1-y_true[:,i]),axis=1)/n_pred)/np.mean(y_true[:,i]) # Calculate the index and store it into csv index_cal(y_prediction1,y_true[:,i]).to_csv(output_folder + '/index'+str(i+1)+'.csv') # Draw pictrue of cvrmse distribution of each y plt.subplot(n_y, 1, i+1) plt.hist(cvrmse) plt.savefig(output_folder + '/cvrmse_dist.pdf') plt.close() #Draw Picture of Prediction_Plot for i in range(n_y): index = list(range(0+i,n_pred*n_y+i,n_y)) y_prediction_mean = np.array(pm.summary(trace)['mean'][index])*yc_sd[i]+yc_mean[i] y_prediction_975 = np.array(pm.summary(trace)['hpd_97.5'][index])*yc_sd[i]+yc_mean[i] y_prediction_025 = np.array(pm.summary(trace)['hpd_2.5'][index])*yc_sd[i]+yc_mean[i] plt.subplot(n_y, 1, i+1) # estimated probability plt.scatter(x=range(n_pred), y=y_prediction_mean) # error bars on the estimate plt.vlines(range(n_pred), ymin=y_prediction_025, ymax=y_prediction_975) # actual outcomes plt.scatter(x=range(n_pred), y=y_true[:,i], marker='x') plt.xlabel('predictor') plt.ylabel('outcome') # This is just to print original cvrmse to test whether outcome good if i == 0: cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction_mean-y_true[:,0]))/len(y_prediction_mean-y_true[:,0]))/np.mean(y_true[:,0]) print(cvrmse) plt.savefig(output_folder + '/Prediction_Plot.pdf') plt.close()
import numpy as np import pymc3 as pm from matplotlib import pyplot as plt if __name__ == "__main__": with pm.Model(): mu = np.zeros(3) true_cov = np.array([[1.0, 0.5, 0.1], [0.5, 2.0, 0.2], [0.1, 0.2, 1.0]]) data = np.random.multivariate_normal(mu, true_cov, 100) print(data) sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=3) chol_packed = pm.LKJCholeskyCov("chol_packed", n=3, eta=2, sd_dist=sd_dist) chol = pm.expand_packed_triangular(3, chol_packed) vals = pm.MvNormal("vals", mu=mu, chol=chol, observed=data) trace = pm.sample() pm.traceplot(trace) plt.savefig("tmp.png")
def print_rv(a, b): tt.printing.Print(a)(b) with pm.Model() as model: # Generalization of beta prior p = pm.Dirichlet('p', a=np.ones(4), shape=4) # Generalization of Binomial prior cc = pm.Categorical('c', p=p, shape=2) # prior for covariance matrix packed_L1 = pm.LKJCholeskyCov('packed_L1', n=2, eta=2., sd_dist=pm.HalfCauchy.dist(6)) # Convert to diagonal matrix L1 = pm.expand_packed_triangular(2, packed_L1) # Calculate covariance cov1 = pm.Deterministic('cov1', L1.dot(L1.T)) # print distribution outputs print_rv('p', p) print_rv('cc', cc) print_rv('packed_L1', packed_L1) ''' Model 1 Output:
for i in range(3): betas = pm.Normal(name=f"betas_{i}", sd=2.5, shape=1, testval=0) pi = pm.math.sigmoid(pm.math.matrix_dot(X, betas)) pm.Bernoulli(name=f"Y_{i}", p=pi, observed=Y[:, i]) trace = pm.sample(12000, tune=2000) print(pm.summary(trace)) # Bayesian multivariate LVM with pm.Model(): B = pm.Normal(name=f"B", sd=2.5, shape=B.shape, testval=0) Mu = pm.math.matrix_dot(X, B) # Prior on the correlation matrix ---------------------------------------------- f = pm.Lognormal.dist(sd=1) L = pm.LKJCholeskyCov(name="L", eta=1, n=3, sd_dist=f) ch = pm.expand_packed_triangular(3, L, lower=True) cov = pm.math.matrix_dot(ch, ch.T) sd = tt.sqrt(tt.diag(cov)) Theta = pm.Deterministic("Theta", cov / sd[:, None] / sd[None, :]) # ------------------------------------------------------------------------------ Psi = pm.MvNormal(name="Psi", mu=Mu, cov=Theta, shape=Y.shape) Pi = pm.math.sigmoid(Psi) pm.Bernoulli(name="Y", p=Pi, observed=Y) trace = pm.sample(15000, tune=5000) print(pm.summary(trace, var_names=["B", "Theta"]))
def bsem( items, factors, paths, beta=0, nu_sd=2.5, alpha_sd=2.5, d_beta=2.5, corr_items=False, corr_factors=False, g_eta=100, l_eta=1, beta_beta=1, ): r"""Constructs Bayesian SEM. Args: items (np.array): Array of item data. factors (np.array): Factor design. paths (np.array): Array of directed factor paths. beta (:obj:`float` or `'estimate'`, optional): Standard deviation of normal prior on cross loadings. If `'estimate'`, beta is estimated from the data. nu_sd (:obj:`float`, optional): Standard deviation of normal prior on item intercepts. alpha_sd (:obj:`float`, optional): Standard deviation of normal prior on factor intercepts. d_beta (:obj:`float`, optional): Scale parameter of half-Cauchy prior on factor standard deviation. corr_factors (:obj:`bool`, optional): Allow correlated factors. corr_items (:obj:`bool`, optional): Allow correlated items. g_eta (:obj:`float`, optional): Shape parameter of LKJ prior on residual item correlation matrix. l_eta (:obj:`float`, optional): Shape parameter of LKJ prior on factor correlation matrix. beta_beta (:obj:`float`, optional): Beta parameter of beta prior on beta. Returns: None: Places model in context. """ # get numbers of cases, items, and factors n, p = items.shape p_, m = factors.shape assert p == p_, "Mismatch between data and factor-loading matrices" assert paths.shape == (m, m), "Paths matrix has wrong shape" I = tt.eye(m, m) # place priors on item and factor intercepts nu = pm.Normal(name=r"$\nu$", mu=0, sd=nu_sd, shape=p, testval=items.mean(axis=0)) alpha = pm.Normal(name=r"$\alpha$", mu=0, sd=alpha_sd, shape=m, testval=np.zeros(m)) # place priors on unscaled factor loadings Phi = pm.Normal(name=r"$\Phi$", mu=0, sd=1, shape=factors.shape, testval=factors) # place priors on paths B = tt.zeros(paths.shape) npths = np.sum(paths, axis=None) print(npths) if npths > 0: b = pm.Normal(name=r"$b$", mu=0, sd=1, shape=npths, testval=np.ones(npths)) # create the paths matrix k = 0 for i in range(m): for j in range(m): if paths[i, j] == 1: B = tt.set_subtensor(B[i, j], b[k]) k += 1 Gamma = pm.Deterministic("$\Gamma$", B) # create masking matrix for factor loadings if isinstance(beta, str): assert beta == "estimate", f"Don't know what to do with '{beta}'" beta = pm.Beta(name=r"$\beta$", alpha=1, beta=beta_beta, testval=0.1) M = (1 - np.asarray(factors)) * beta + np.asarray(factors) # create scaled factor loadings Lambda = pm.Deterministic(r"$\Lambda$", Phi * M) # determine item means mu = nu + matrix_dot(Lambda, alpha) # place priors on item standard deviations D = pm.HalfCauchy(name=r"$D$", beta=d_beta, shape=p, testval=items.std(axis=0)) # place priors on item correlations f = pm.Lognormal.dist(sd=0.25) if not corr_items: Omega = np.eye(p) else: G = pm.LKJCholeskyCov(name=r"$G$", eta=g_eta, n=p, sd_dist=f) ch1 = pm.expand_packed_triangular(p, G, lower=True) K = tt.dot(ch1, ch1.T) sd1 = tt.sqrt(tt.diag(K)) Omega = pm.Deterministic(r"$\Omega$", K / sd1[:, None] / sd1[None, :]) # determine residual item variances and covariances Theta = pm.Deterministic(r"$\Theta$", D[None, :] * Omega * D[:, None]) # place priors on factor correlations if not corr_factors: Psi = np.eye(m) else: L = pm.LKJCholeskyCov(name=r"$L$", eta=l_eta, n=m, sd_dist=f) ch = pm.expand_packed_triangular(m, L, lower=True) Gamma = tt.dot(ch, ch.T) sd = tt.sqrt(tt.diag(Gamma)) Psi = pm.Deterministic(r"$\Psi$", Gamma / sd[:, None] / sd[None, :]) # determine variances and covariances of items A = matrix_inverse(I - Gamma) C = matrix_inverse(I - Gamma.T) Sigma = matrix_dot(Lambda, A, Psi, C, Lambda.T) + Theta # place priors on observations pm.MvNormal(name="$Y$", mu=mu, cov=Sigma, observed=items, shape=items.shape)
_, ax = plt.subplots(1, 1, figsize=(5, 5)) textloc = [[0, 0.5], [0, 0.8], [0.5, 0.9]] for eta, loc in zip([1, 2, 4], textloc): R = pm.LKJCorr.dist(n=2, eta=eta).random(size=10000) az.plot_kde(R) ax.text(loc[0], loc[1], "eta = %s" % (eta), horizontalalignment="center") ax.set_ylim(0, 1.1) ax.set_xlabel("correlation") ax.set_ylabel("Density") # %% cafe_idx = d["cafe"].values with pm.Model() as m_13_1: sd_dist = pm.HalfCauchy.dist(beta=2) packed_chol = pm.LKJCholeskyCov("chol_cov", eta=2, n=2, sd_dist=sd_dist) chol = pm.expand_packed_triangular(2, packed_chol, lower=True) cov = pm.math.dot(chol, chol.T) sigma_ab = pm.Deterministic("sigma_cafe", tt.sqrt(tt.diag(cov))) corr = tt.diag(sigma_ab**-1).dot(cov.dot(tt.diag(sigma_ab**-1))) r = pm.Deterministic("Rho", corr[np.triu_indices(2, k=1)]) ab = pm.Normal("ab", mu=0, sd=10, shape=2) ab_cafe = pm.MvNormal("ab_cafe", mu=ab, chol=chol, shape=(N_cafes, 2)) mu = ab_cafe[:, 0][cafe_idx] + ab_cafe[:, 1][cafe_idx] * d["afternoon"].values sd = pm.HalfCauchy("sigma", beta=2) wait = pm.Normal("wait", mu=mu, sd=sd, observed=d["wait"])
if k > 1: # Prior over component weights (only applicable with k>1) p = pm.Dirichlet('p', a=np.array([1.] * k), testval=np.ones(k) / k) # Prior over component means mus_p = [ pm.MvNormal('mu_%d' % pid, mu=pm.floatX(np.zeros(2)), tau=pm.floatX(0.1 * np.eye(2)), shape=(k, 2)) for pi, pid in enumerate(data.keys()) ] # Cholesky decomposed LKJ prior over component covariance matrices packed_L = [[ pm.LKJCholeskyCov('packed_L_%d_%d' % (pid, i), n=2, eta=2., sd_dist=pm.HalfCauchy.dist(.01)) for i in range(k) ] for pi, pid in enumerate(data.keys())] # Unpack packed_L into full array L = [[ pm.expand_packed_triangular(2, packed_L[pi][i]) for i in range(k) ] for pi, pid in enumerate(data.keys())] # Convert L to sigma for convenience sigma = [[ pm.Deterministic('sigma_%d_%d' % (pid, i), L[pi][i].dot(L[pi][i].T)) for i in range(k) ] for pi, pid in enumerate(data.keys())]
'paziente8': [1, 0, 1, 1, 0, 0, 1, 1, 0, 1], 'paziente9': [1, 0, 1, 1, 0, 0, 1, 1, 0, 1], 'paziente10': [1, 0, 1, 1, 0, 0, 1, 1, 0, 1], 'paziente11': [0, 1, 1, 1, 0, 0, 1, 1, 1, 0], 'paziente12': [1, 0, 1, 1, 0, 0, 1, 1, 0, 1], 'paziente13': [0, 1, 1, 1, 0, 0, 1, 1, 1, 0] }) #%% model = pm.Model() sigma0 = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) SEED = 12345 with model: packed_L = pm.LKJCholeskyCov('Packed_L', n=len(dati), eta=2., sd_dist=pm.HalfCauchy.dist(1)) L = pm.expand_packed_triangular(len(dati), packed_L) # sigma = pm.Wishart("Sigma", nu = 1, V = sigma0, shape = (3,3)) sigma = pm.Deterministic('Sigma', L.dot(L.T)) # mu = pm.Normal('mu', 0., 10., shape=3, testval = dati.mean(axis=1)) logits = pm.MvNormal("Logits", mu=0.5 * np.ones(len(dati)), cov=sigma, shape=(1, len(dati))) p = pm.Deterministic('p', tt.exp(logits) / (1 + tt.exp(logits))) observed = pm.Binomial("Observed", p=p, n=1, observed=dati.values.T) trace = pm.sample(random_seed=SEED, cores=1)
## BET: FORMULATE THE MODEL mvg_model = pm.Model() with mvg_model: # bias parametrization x = pm.Uniform('x',lower=0. ,upper=55., shape = (N,1)) b0 = pm.Normal('b0',mu=0., sd=20., shape=M) b1 = pm.Normal('b1',mu=1., sd=0.5, shape=M) b2 = pm.Normal('b2',mu=0., sd=1./55., shape=M) xxx =pm.math.concatenate([x for _ in range(M)],axis = 1) #that list comprehension is just a "repmat" mu = xxx*xxx*b2+xxx*b1 + b0 # covariance parametrization #sd_dist = TruncatedJeff.dist(shape=M) sd_template = pm.Bound(Jeff,lower = 0.001, upper = 55.) sd_dist = sd_template.dist(shape=M,testval=1.) chol_packed = pm.LKJCholeskyCov('chol_packed', n=M, eta = 100000.,sd_dist=sd_dist) chol = pm.expand_packed_triangular(M,chol_packed) # data connection y = pm.MvNormal('y',mu = mu, chol= chol, shape = (N,M),observed = data) ## GIMEL: SAMPLE with mvg_model: # step = pm.Slice() trace = pm.sample(nsamples_aug, njobs = njobs,tune=2000) ## DALET: SAVE # with open('trace','wb') as f: # pickle.dump(trace,f) ## WAW: EXPORT IN A DIGESTABLE FORMAT
def bayesian_linear_model(data, N_steps=20000, step="Metropolis", burnin=None, njobs=1, progressbar=True, chain_start=0, output_format="rho", sample_params={}): """Docstring for bayesian linear model. :data: The data used, expects a 2-d array with one dimension having 2 columns/rows :N_steps: The number of steps in each chain. If using NUTS sampling, this can be smaller. :step: The sampling method. Either "Metropolis" (faster sampling, but needs more steps) or NUTS (slower, but fewer steps) :burnin: number of steps to discard at the beginning of each chain. If None, half of N_steps is discarded. :njobs: The number of parallel jobs. :chain_start: The number assigned to the chain. Can be useful when aiming to combine different chains :progressbar: Should a progressbar for the sampling? :output_format: What should be returned from the sampling? If "rho" only an numpy array with the correlation values is returned. If "full" the whole multitrace is returned, which is useful for convergence analysis. :sample_params: Additional parameters for pymc3's sample function. :returns: Either a multitrace or a numpy array, depending on output_format """ # test the data for the right format and transform it if necessary/possible if isinstance(data, list): try: data = np.vstack(data) except ValueError: print("Error: Data dimensions do not match!") return None if isinstance(data, np.ndarray): if len(data.shape) != 2: if len(data) == 2 and len(data[0]) != len(data[1]): print("Error: Data dimensions do not match!") return None else: print( "Error: Data not a two-dimensional array, don't know what to do!" ) return None else: if data.shape[1] != 2: if data.shape[0] != 2: print( "Error: No dimension with 2 variables present, don't know what to do!" ) else: data = data.T # if no burnin is specified, use half of the step number if burnin is None: burnin = N_steps / 2 sample_params.update({ "draws": N_steps, "njobs": njobs, "tune": burnin, "progressbar": progressbar }) # initialize model basic_model = pm.Model() with basic_model: # define model priors m_mu = pm.Normal('mu', mu=0., sd=10, shape=2) nu = pm.Uniform('nu', 0, 5) packed_L = pm.LKJCholeskyCov('packed_L', n=2, eta=nu, sd_dist=pm.HalfCauchy.dist(1.)) chol = pm.expand_packed_triangular(2, packed_L) sigma = pm.Deterministic('sigma', _Cov2Cor(chol.dot(chol.T))) # model likelihood mult_n = pm.MvNormal('mult_n', mu=m_mu, chol=chol, observed=data) # which sampler to use if step == "Metropolis": step = pm.Metropolis() elif step == "NUTS": step = pm.NUTS() sample_params.update({"step": step}) # MCMC sample trace = pm.sample(**sample_params) # Return the full pymc3 trace or only an array of the correlation? if output_format == "full": output = trace else: output = trace["sigma"][:, 0, 1] return output
# xxx*xxx*b2 mu = bn[0] for k in range(1, K): mu = mu + bn[k] * xxx**k # covariance parametrization sd_template = pm.Bound( Jeff, lower=0.01, upper=np.max(data.max(axis=0) - data.min(axis=0))) sd_dist = sd_template.dist(shape=M, testval=testval['S']) # sd_dist = pm.HalfCauchy.dist(1.,shape = M,testval = 1.,) # sd_dist = pm.Uniform.dist(lower = 0., upper = data.max(axis=0)-data.min(axis=0), shape = M) chol_packed = pm.LKJCholeskyCov('chol_packed', n=M, eta=1., sd_dist=sd_dist, testval=testval['chol_packed']) chol = pm.expand_packed_triangular(M, chol_packed) # data connection y = pm.MvNormal('y', mu=mu, chol=chol, shape=(N, M), observed=data) # mvg_model.logp() with mvg_model: if conf.approx: approx = pm.fit(method=conf.approx) trace = approx.sample(conf.nsamples_per_chain) save_state(trace, tracedir, mvg_model, approx, sca) exit(0) step = pm.NUTS()
# Mulit-Level Model # A frequentist HLM indicated that condition does not have an effect, so we don't implement that as a level here. # But honestly, because I don't know how. with pm.Model( coords=coords ) as model: # block, user, direction. Based on m14_3 Statistical Rethinking 2. user_idx = pm.Data("user_idx", user_indices, dims="obs_id") block_idx = pm.Data("block_idx", block_indices, dims="obs_id") direction_idx = pm.Data("direction_idx", direction_indices, dims="obs_id") # Fixed priors. g = pm.Normal("g", mu=0.0, sd=1.0, dims='Direction') sd_dist = pm.Exponential.dist(1.0) chol_user, _, _ = pm.LKJCholeskyCov("chol_user", n=n_directions, eta=4, sd_dist=sd_dist, compute_corr=True) chol_block, _, _ = pm.LKJCholeskyCov("chol_block", n=n_directions, eta=4, sd_dist=sd_dist, compute_corr=True) # Adaptive priors, non-centered. z_user = pm.Normal("z_user", 0.0, 1.0, dims=('Direction', 'User')) alpha = pm.Deterministic("alpha", pm.math.dot(chol_user, z_user)) z_block = pm.Normal("z_block", 0.0, 1.0, dims=('Direction', 'Block')) beta = pm.Deterministic("beta", pm.math.dot(chol_block, z_block)) theta = pm.Deterministic(
def create_model(self): """ Creates and returns the PyMC3 model. Note: The size of the shared variables must match the size of the training data. Otherwise, setting the shared variables later will raise an error. See http://docs.pymc.io/advanced_theano.html The DensityDist class is used as the likelihood term. The second argument, logp_gmix(mus, pi, np.eye(D)), is a python function which recieves observations (denoted by 'value') and returns the tensor representation of the log-likelihood. Returns ---------- the PyMC3 model """ model_input = theano.shared( np.zeros([self.num_training_samples, self.num_pred])) # model_output = theano.shared(np.zeros(self.num_training_samples)) # model_truncate = theano.shared(np.zeros(self.num_training_samples, # dtype='int')) self.shared_vars = { 'model_input': model_input # , # 'model_output': model_output, # 'model_truncate': model_truncate } # Log likelihood of normal distribution # def logp_normal(mu, tau, value): # # log probability of individual samples # k = tau.shape[0] # # def delta(mu): # return value - mu # # delta = lambda mu: value - mu # return (-1 / 2.) * (k * T.log(2 * np.pi) + T.log(1./det(tau)) + # (delta(mu).dot(tau) * delta( # mu)).sum(axis=1)) # Log likelihood of Gaussian mixture distribution # def logp_gmix(mus, pi, tau): # def logp_(value): # logps = [T.log(pi[i]) + logp_normal(mu, tau, value) # for i, mu in enumerate(mus)] # # return T.sum( # logsumexp(T.stacklists(logps)[:, :self.num_training_samples], # axis=0)) # # return logp_ def stick_breaking(v): portion_remaining = tt.concatenate([[1], tt.extra_ops.cumprod(1 - v)[:-1]]) return v * portion_remaining model = pm.Model() with model: K = self.num_truncate D = self.num_pred alpha = pm.Gamma('alpha', 1.0, 1.0) v = pm.Beta('v', 1, alpha, shape=K) pi_ = stick_breaking(v) pi = pm.Deterministic('pi', pi_ / pi_.sum()) means = tt.stack([ pm.Uniform('cluster_center_{}'.format(k), lower=0., upper=10., shape=D) for k in range(K) ]) lower = tt.stack([ pm.LKJCholeskyCov('cluster_variance_{}'.format(k), n=D, eta=2., sd_dist=pm.HalfNormal.dist(sd=1.)) for k in range(K) ]) chol = tt.stack( [pm.expand_packed_triangular(D, lower[k]) for k in range(K)]) component_dists = [ pm.MvNormal('component_dist_%d' % k, mu=means[k], chol=chol[k], shape=D) for k in range(K) ] # rand = [pm.MvNormal( # 'rand_{}'.format(k), # mu=means[k], chol=Chol[k], shape=D) for k in range(K)] rand = pm.Normal.dist(0, 1).random X = pm.DensityDist( 'X', logp_gmix( mus=component_dists, pi=pi, tau=np.eye(D), num_training_samples=model_input.get_value().shape[0]), observed=model_input, random=rand) return model
minibatch_size = 500 X_minibatch = pm.Minibatch(X, minibatch_size) # set up model with pm.Model() as model: pi = pm.Dirichlet('pi', np.ones(K)) comp_dist = [] mu = [] packed_chol = [] chol = [] for i in range(K): temp_mean = np.random.randint(low=50, high=200, size=D) mu.append(pm.Normal('mu%i' % i, temp_mean, 20, shape=D)) packed_chol.append( pm.LKJCholeskyCov('chol_cov_%i' % i, eta=2, n=D, sd_dist=pm.HalfNormal.dist(10))) chol.append(pm.expand_packed_triangular(D, packed_chol[i], lower=True)) comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i])) xobs = pm.Mixture('x_obs', pi, comp_dist, observed=X_shared) print("making inference...") # Inference with model: advi_mf = pm.ADVI() advi_mf.fit(10000, more_replacements={X_shared: X_minibatch}, obj_optimizer=pm.adagrad(learning_rate=1e-2)) fig = plt.figure()