def test_vae(): minibatch_size = 10 data = pm.floatX(np.random.rand(100)) x_mini = pm.Minibatch(data, minibatch_size) x_inp = tt.vector() x_inp.tag.test_value = data[:minibatch_size] ae = theano.shared(pm.floatX([.1, .1])) be = theano.shared(pm.floatX(1.)) ad = theano.shared(pm.floatX(1.)) bd = theano.shared(pm.floatX(1.)) enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be mu, rho = enc[:, 0], enc[:, 1] with pm.Model(): # Hidden variables zs = pm.Normal('zs', mu=0, sd=1, shape=minibatch_size) dec = zs * ad + bd # Observation model pm.Normal('xs_', mu=dec, sd=0.1, observed=x_inp) pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)}, more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
def test_vae(): minibatch_size = 10 data = pm.floatX(np.random.rand(100)) x_mini = pm.Minibatch(data, minibatch_size) x_inp = tt.vector() x_inp.tag.test_value = data[:minibatch_size] ae = theano.shared(pm.floatX([.1, .1])) be = theano.shared(pm.floatX(1.)) ad = theano.shared(pm.floatX(1.)) bd = theano.shared(pm.floatX(1.)) enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be mu, rho = enc[:, 0], enc[:, 1] with pm.Model(): # Hidden variables zs = pm.Normal('zs', mu=0, sigma=1, shape=minibatch_size) dec = zs * ad + bd # Observation model pm.Normal('xs_', mu=dec, sigma=0.1, observed=x_inp) pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)}, more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
def test_expressions(expr): with Model() as model: var = expr((10, 10)) Normal('obs', observed=var) assert var.tag.test_value.shape == (10, 10) assert len(model.free_RVs) == 3 fit(1)
def sample_chain(model, chain_i=0, step=None, num_samples=MAX_NUM_SAMPLES, advi=False, tune=5, discard_tuned_samples=True, num_scale1_iters=NUM_SCALE1_ITERS, num_scale0_iters=NUM_SCALE0_ITERS): """Sample single chain from constructed Bayesian model""" start = timer() with model: if not advi: pm._log.info('Assigning NUTS sampler...') if step is None: start_, step = pm.init_nuts(init='advi', njobs=1, n_init=NUM_INIT_STEPS, random_seed=-1, progressbar=False) discard = tune if discard_tuned_samples else 0 for i, trace in enumerate( pm.iter_sample(num_samples + discard, step, start=start_, chain=chain_i)): if i == 0: min_num_samples = get_min_samples_per_chain( len(trace[0]), MIN_SAMPLES_CONSTANT, NUM_CHAINS) elapsed = timer() - start if elapsed > SOFT_MAX_TIME_IN_SECONDS / NUM_CHAINS: print('exceeded soft time limit...') if i + 1 - discard >= min_num_samples: print('collected enough samples; stopping') break else: print('but only collected {} of {}; continuing...'. format(i + 1 - discard, min_num_samples)) if elapsed > HARD_MAX_TIME_IN_SECONDS / NUM_CHAINS: print('exceeded HARD time limit; STOPPING') break return trace[discard:] else: # ADVI for neural networks scale = theano.shared(pm.floatX(1)) vi = pm.ADVI(cost_part_grad_scale=scale) pm.fit(n=num_scale1_iters, method=vi) scale.set_value(0) approx = pm.fit(n=num_scale0_iters) # one sample to get dimensions of trace trace = approx.sample(draws=1) min_num_samples = get_min_samples_per_chain( len(trace.varnames), MIN_SAMPLES_CONSTANT, 1) trace = approx.sample(draws=min_num_samples) return trace
def test_discrete_not_allowed(): mu_true = np.array([-2, 0, 2]) z_true = np.random.randint(len(mu_true), size=100) y = np.random.normal(mu_true[z_true], np.ones_like(z_true)) with pm.Model(): mu = pm.Normal('mu', mu=0, sd=10, shape=3) z = pm.Categorical('z', p=tt.ones(3) / 3, shape=len(y)) pm.Normal('y_obs', mu=mu[z], sd=1., observed=y) with pytest.raises(opvi.ParametrizationError): pm.fit(n=1) # fails
def test_discrete_not_allowed(): mu_true = np.array([-2, 0, 2]) z_true = np.random.randint(len(mu_true), size=100) y = np.random.normal(mu_true[z_true], np.ones_like(z_true)) with pm.Model(): mu = pm.Normal('mu', mu=0, sigma=10, shape=3) z = pm.Categorical('z', p=tt.ones(3) / 3, shape=len(y)) pm.Normal('y_obs', mu=mu[z], sigma=1., observed=y) with pytest.raises(opvi.ParametrizationError): pm.fit(n=1) # fails
def main(): config = create_configuration(filename='/regression-siso.json') dataset = get_dataset(config.dataset, testing=False) # %% x_train = dataset.x y_train = dataset.y x = theano.shared(x_train) y = theano.shared(y_train) nn = construct_nn(x=x, y=y, config=config) # ADVI with nn: inference = pm.ADVI() approx = pm.fit(n=50000, method=inference) trace = approx.sample(draws=5000) # with nn: # inference = pm.NUTS() # trace = pm.sample(2000, tune=1000, cores=4, inference=inference) print(pm.summary(trace)) x.set_value(x_train) y.set_value(y_train) with nn: ppc = pm.sample_ppc(trace, samples=500, progressbar=False)
def _build_BPF(self): print('start building the Bayesian probabilistic model') self.x_u = theano.shared(self.train_u) self.x_i = theano.shared(self.train_i) self.y_r = theano.shared(self.train_r) self.y_r_ui = theano.shared(np.array(self.nn_r_ui)) assert (len(self.y_r.get_value()) == len(self.y_r_ui.get_value())) with pm.Model() as self.bncf: #define the prior and likelihood b_u = pm.Normal('b_u', 0, sd=1, shape=self.shape[0]) b_i = pm.Normal('b_i', 0, sd=1, shape=self.shape[1]) u = pm.Normal('u', 0, sd=1) tY = pm.Deterministic( 'tY', tt.add( tt.add(tt.add(b_u[self.x_u], b_i[self.x_i]), self.y_r_ui), u)) #tY = pm.Deterministic('tY', ((b_u[self.x_u]+b_i[self.x_i])+self.y_r_ui)+u)#b_u+b_i+u+nn_r_ui nY = pm.Deterministic('nY', pm.math.sigmoid(tY)) # likelihood of observed data Y = pm.Bernoulli( 'Y', nY, observed=self.y_r) #total_size=self.y_r.get_value().shape[0] with self.bncf: #inference approx = pm.fit(n=1000, method=pm.ADVI()) self.trace = approx.sample(draws=500) with self.bncf: #posterior prediction ppc = pm.sample_posterior_predictive(self.trace, progressbar=True) self.by_r_ui = ppc['Y'].mean(axis=0) print('done building the Bayesian probabilistic model')
def _sample(self, num_epochs = None, num_draws = None): if not num_epochs: num_epochs = self.num_epochs if not num_draws: num_draws = self.num_draws with self.model: approx = pm.fit(n = num_epochs, obj_optimizer = pm.adam(learning_rate = self.learning_rate)) self.trace = approx.sample(draws = num_draws)
def fit(self, fast_sampling=True, sample_size=3000): with pm.Model() as self.model: beta = pm.Normal('beta', mu=0.0, tau=1.0, shape=(self.dim + 1, 1)) # Priors for spatial random effects tau = pm.Gamma('tau', alpha=2., beta=2.) alpha = pm.Uniform('alpha', lower=0, upper=1) phi = pm.MvNormal('phi', mu=0, tau=tau * (self.D - alpha * self.weight_matrix), shape=(1, self.N)) # Mean model mu = pm.Deterministic('mu', tt.dot(self.covariates, beta) + phi.T) theta_sd = pm.Gamma('theta_sd', alpha=1.0, beta=1.0) # Likelihood Yi = pm.Normal('Yi', mu=mu.ravel(), tau=theta_sd, observed=self.response_var) if fast_sampling: inference = pm.ADVI() approx = pm.fit(n=50000, method=inference) #until converge self.trace = approx.sample(draws=sample_size) else: self.trace = pm.sample(sample_size, cores=2, tune=1000) self._report_credible_interval(self.trace, 'beta') self._report_credible_interval(self.trace, 'tau')
def fit(self, draws=500, chains=4, trace_size=500, method='NUTS', map_initialization=False, finalize=True, step_kwargs={}, sample_kwargs={}): """Fit the PMProphet model. Parameters ---------- draws : int, > 0 The number of MCMC samples. chains: int, =4 The number of MCMC draws. trace_size: int, =1000 The last N number of samples to keep in the trace method : 'NUTS' or 'Metropolis' or 'ADVI'. map_initialization : bool Initialize the model with maximum a posteriori estimates. finalize : bool Finalize the model. step_kwargs : dict Additional arguments for the sampling algorithms (`NUTS` or `Metropolis`). sample_kwargs : dict Additional arguments for the PyMC3 `sample` function. Returns ------- The fitted PMProphet object. """ if chains * draws < trace_size and method != 'ADVI': raise Exception("Desired trace size should be smaller than the sampled data points") self.skip_first = (chains * draws) - trace_size if method != 'ADVI' else 0 self.chains = chains if finalize: self.finalize_model() with self.model: if map_initialization: self.start = pm.find_MAP(maxeval=10000) if draws == 0: self.trace = {k: np.array([v]) for k, v in self.start.items()} if draws: if method == 'NUTS' or method == 'Metropolis': self.trace = pm.sample( draws, chains=chains, step=pm.Metropolis(**step_kwargs) if method == 'Metropolis' else pm.NUTS(**step_kwargs), start=self.start if map_initialization else None, **sample_kwargs ) else: res = pm.fit( draws, start=self.start if map_initialization else None ) self.trace = res.sample(trace_size) return self
def fit(self, sampling_size=5000, fast_sample=False): with pm.Model() as self.model: rho = pm.Exponential('rho', 1/5, shape=self.dim_gp) tau = pm.Exponential('tau', 1/3) cov_func = pm.gp.cov.Matern52(self.dim_gp, ls=rho) self.gp = pm.gp.Latent(cov_func=cov_func) f = self.gp.prior("f", X=self.locations) mean_func = f self.beta_list = [] if self.covariates: for i in range(len(self.covariates)): beta = pm.Normal('_'.join(['beta', str(i)]), mu=0, sd=50) self.beta_list.append(beta) mean_func = mean_func + beta*self.covariates[i] sigma = pm.HalfNormal('sigma', sd=20) y = pm.Normal('Y', mu=mean_func, sd=sigma, observed=self.response) if fast_sample: inference = pm.ADVI() approx = pm.fit(n=25000, method=inference) #until converge self.trace = approx.sample(draws=sampling_size) else: start = pm.find_MAP() self.trace = pm.sample(sampling_size, tune=10000, nchains=4)
def run_factorization(self): rat = self.allelic_counts/self.total_counts nans = np.isnan(rat) # Run bb-mf with pm.Model() as bb_glm: CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init) BETA = pm.Normal('BETA', mu=0, tau=(1/1000000.0), shape=(self.S, self.num_cov), testval=self.beta_init) U = pm.Normal('U', mu=0, tau=(1.0/100.0), shape=(self.N, self.K), testval=self.U_init) V = pm.Normal('V', mu=0, tau=(1.0/100.0), shape=(self.S, self.K), testval=self.V_init) MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init) SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init) mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A) sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A) A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init) p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(U,V.T) + A[self.Z,:]) conc_mat = pm.math.dot(np.ones((self.N,1)), CONC) R = pm.BetaBinomial('like',alpha=(p*conc_mat)[~nans], beta=((1.0-p)*conc_mat)[~nans], n=self.total_counts[~nans], observed=self.allelic_counts[~nans]) approx = pm.fit(method='advi', n=1000) pickle.dump(approx, open(self.output_root + '_model', 'wb')) #approx = pickle.load( open(self.output_root + '_model', "rb" ) ) means_dict = approx.bij.rmap(approx.params[0].eval()) np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_CONC.txt', np.exp(means_dict['CONC_log__']), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_A.txt', (means_dict['A']), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_MU_A.txt', (means_dict['MU_A']), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_SIGMA_A.txt', np.exp(means_dict['SIGMA_A_log__']), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_ELBO.txt', approx.hist, fmt="%s", delimiter='\t')
def fit(self, X, Y, samples=500, advi_n=50000, advi_n_mc=1, advi_obj_optimizer=pm.adam(learning_rate=.1)): self.num_samples = samples self._build_model(X, Y) with self.model: if self.inference_method == 'advi': mean_field = pm.fit( n=advi_n, method='advi', obj_n_mc=advi_n_mc, obj_optimizer=advi_obj_optimizer ) # TODO: how to determine hyperparameters? self.trace = mean_field.sample(draws=samples) elif self.inference_method == 'mcmc': self.trace = pm.sample(samples, tune=samples) else: raise Exception( "Unknown output parameter value: %s. Choose among 'normal', 'bernoulli'." % self.output)
def run_factorization(self, N, S, X, K, num_cov, k, n): # Smart initialization rat = k/n nans = np.isnan(rat) conc_inits = np.zeros((1, S)) beta_inits = np.zeros((num_cov, S)) for index_s in range(S): column_rat = rat[:, index_s] column_nans = np.isnan(column_rat) valid_rat = column_rat[~column_nans] conc_init = min(1.0/np.var(valid_rat), 1000.0) m_init = min(max(np.mean(valid_rat), 1.0/1000 ), 1.0-(1.0/1000)) conc_inits[0, index_s] = conc_init beta_inits[0, index_s] = np.log(m_init/(1.0-m_init)) # Run bb-mf with pm.Model() as bb_glm: CONC = pm.Gamma('CONC', alpha=1e-4, beta=1e-4, shape=(1,S), testval=conc_inits) BETA = pm.Normal('BETA', mu=0, tau=(1/1000000.0), shape=(S, num_cov), testval=beta_inits.T) U = pm.Normal('U', mu=0, tau=(1/1000.0), shape=(N, K), testval=np.random.randn(N, K)) V = pm.Normal('V', mu=0, tau=(1/1000.0), shape=(S, K), testval=np.random.randn(S, K)) p = pm.math.invlogit(pm.math.dot(X, BETA.T) + pm.math.dot(U,V.T)) conc_mat = pm.math.dot(np.ones((N,1)), CONC) R = pm.BetaBinomial('like',alpha=(p*conc_mat)[~nans], beta=((1.0-p)*conc_mat)[~nans], n=n[~nans], observed=k[~nans]) approx = pm.fit(method='advi', n=30000) pickle.dump(approx, open(self.output_root + '_model', 'wb')) #approx = pickle.load( open(self.output_root + '_model', "rb" ) ) means_dict = approx.bij.rmap(approx.params[0].eval()) np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t')
def fit_expected_successes_per_action_model(xS, attempts): ## estimates a hierarchical binomial model for success rate data ## takes as input: ## sp, a numpy array of shape (num_players,) containing the expected successes per action for each player (e.g. xG per shot, xA per KP) ## attempts, a numpy array of shape (num_players,) containing the total numbers of attempted actions for each player (e.g. shots, key passes) ## returns: ## sl, a numpy array of shape (6000,N) containing 6000 posterior samples of success probabilites (N is the number of players in the ## original data frame who have registered non-zero expected succcesses) ## sb, a numpy array of shape (6000,3) containing 6000 posterior samples of: the population-level & observation-level beta 'sample size' ## parameters and the population-level mean ## kk, boolean indicating which players have actually registered non-zero expected successes import numpy as np import pymc3 as pm kk = (attempts > 0) & (xS > 0) sp = xS[kk] / attempts[kk] attempts = attempts[kk] N = attempts.shape[0] with pm.Model() as model: v = pm.HalfNormal('v', shape=2, sigma=100) mu = pm.Uniform('mu') lambdas = pm.Beta('lambdas', alpha=mu * v[0], beta=(1 - mu) * v[0], shape=N) y = pm.Beta('y', alpha=lambdas * (attempts * (v[1] + 1) - 1), beta=(1 - lambdas) * (attempts * (v[1] + 1) - 1), observed=sp) approx = pm.fit(n=30000) sl = approx.sample(6000)['lambdas'] sb = np.c_[approx.sample(6000)['v'], approx.sample(6000)['mu']] return [sl, sb, kk, 'expected']
def fit_counts_model(counts, mins_played): ## estimates a hierarchical poisson model for count data ## takes as input: ## counts, a numpy array of shape (num_players,) containing the total numbers of actions completed (across all games) ## mins_played, a numpy array of shape (num_players,) containing the total number of minutes each player was observed for ## returns: ## sl, a numpy array of shape (6000,N) containing 6000 posterior samples of actions per 90 (N is the number of players in the ## original data frame who have actually played minutes) ## sb, a numpy array of shape (6000,2) containing 6000 posterior samples of the population-level gamma shape parameter & ## the population-level mean ## kk, boolean indicating which players have actually played minutes import numpy as np import pymc3 as pm kk = (mins_played > 0) & np.isfinite(counts) mins_played = mins_played[kk] counts = counts[kk] N = counts.shape[0] with pm.Model() as model: beta = pm.HalfNormal('beta', sigma=100) mu = pm.HalfFlat('mu') lambdas = pm.Gamma('lambdas', alpha=mu * beta, beta=beta, shape=N) lambda_tilde = lambdas * mins_played y = pm.Poisson('y', lambda_tilde, observed=counts) approx = pm.fit(n=30000) sl = approx.sample(6000)['lambdas'] * 90 sb = np.c_[approx.sample(6000)['beta'], approx.sample(6000)['mu']] return [sl, sb, kk, 'count']
def fit_vi(self): self.likelihood_fn() with self.model: print('Fitting model...') self.trace_vi = pm.fit( self.n_iterations, method='advi', # Stochastic nature of VI in PyMC3. In PyMC3, VI uses MC sample to approximate the objective gradients. # As a consequence, the result of the fit is stochastic - you can see that in the ELBO it is not always decreasing. # So when you stop the training, VI return the fitting from the last iteration, which can happen to have high ELBO. # Solution is to increase the obj_n_mc - Number of monte carlo samples used for approximation of objective gradients. obj_n_mc=1, obj_optimizer=pm.adamax(), # Defining a callback to do early stop when convergence is achieved callbacks=[ pm.callbacks.CheckParametersConvergence(every=50, diff='absolute', tolerance=1e-3) ]) print('Sampling...') self.trace_vi_samples = self.trace_vi.sample() self.pred_samples_fit = pm.sample_posterior_predictive( self.trace_vi_samples, vars=[self.y_pred], samples=500) if not self.minibatch: # with minibatch there is no possibility to recover the fitted values # backtransform the sampling of the fit for the original scale self.pred_samples_fit = self.dt.inv_transf_train_general( self.pred_samples_fit['y_pred'])
def fit_models(models, method='NUTS', verbose=True, n_samples=2000, n_advi=200000, **kwargs): if isinstance(models, pm.model.Model): models = [models] elif isinstance(models, list) and np.alltrue( np.array([isinstance(model, pm.model.Model) for model in models])): pass else: raise ValueError( "Models must be list of <pymc3.model.Model> instances.") if verbose: print('Fitting {} model(s) using {}...'.format(len(models), method)) traces = [] for m, model in enumerate(models): if verbose: print(' Fitting model {} of {}...'.format(m + 1, len(models))) with model: if method == 'NUTS': trace = pm.sample(draws=n_samples, **kwargs) elif method == 'ADVI': vi_est = pm.fit(n=n_advi, **kwargs) trace = vi_est.sample(n_samples) traces.append(trace) if len(traces) == 1: return traces[0] else: return traces
def VINormal(dim, const_str, const_fx, K, nfit=30000): """\ Normal (full-rank) sampling, fit with ADVI to a high-potential probability distribution :input dim: The dimensionality :input const_str: Constraint strings; used to define potentials :input const_fx: Constraint callables, included for API compatibility :input K: Number of points to sample :input nfit: Number of gradient iterations for variational inference :returns: A set of points X drawn from a N(μ,Σ); where the parameters are fit by variational inference to match the potential distribution formed by the potentials -c*g_i; for c=7500 """ with pm.Model() as mod: x = pm.Uniform('x', shape=dim) for i, const in enumerate(const_str): cname = 'g%d' % i g = pm.Deterministic(cname, eval(const, {'__builtins__': None}, {'x': x } )) pname = '%s_pot' % cname pm.Potential(pname, tt.switch(tt.lt(g, 0), 7500*g, 0)) fit_res = pm.fit(nfit, method='fullrank_advi', obj_n_mc=3) trace = fit_res.sample(K) return trace['x']
def approximate_posterior_predictive(spec): # Fit a neural network with a speficied number of nodes in a single hidden layer print("Fitting an MLE model...") nn = SimpleNN(width=spec["width"], num_iters=5_000, step_size=0.01, checkpoint=1_000, seed=0) nn.fit(df.x, df.y) print("", end="", flush=True) x_input = theano.shared(df[["x"]].values) y_output = theano.shared(df["y"].values) # Build a hierarchical Bayesian neural network. Initialize with MLE. model = build_model( x_input, y_output, sigma=spec["sigma"], noise=spec["noise"], width=spec["width"], n_weights=nn.n_weights, init=nn.weights, ) # Sample from the posterior if spec["algorithm"] == "NUTS": trace = pm.sample(**nuts_kwargs, model=model) elif spec["algorithm"] == "ADVI": mean_field = pm.fit(**advi_kwargs, model=model) trace = mean_field.sample(10_000) else: raise NotImplemented return trace, nn
def fit_advi(self, n=3, method='advi', n_type='restart'): r"""Find posterior using ADVI (maximising likehood of the data and minimising KL-divergence of posterior to prior) :param n: number of independent initialisations :param method: to allow for potential use of SVGD or MCMC (currently only ADVI implemented). :param n_type: type of repeated initialisation: 'restart' to pick different initial value, 'cv' for molecular cross-validation - splits counts into n datasets, for now, only n=2 is implemented 'bootstrap' for fitting the model to multiple downsampled datasets. Run `mod.bootstrap_data()` to generate variants of data ' :return: self.mean_field dictionary with MeanField pymc3 objects. """ if not np.isin(n_type, ['restart', 'cv', 'bootstrap']): raise ValueError( "n_type should be one of ['restart', 'cv', 'bootstrap']") self.mean_field = {} self.samples = {} self.node_samples = {} self.n_type = n_type if np.isin(n_type, ['bootstrap']): if self.X_data_sample is None: self.bootstrap_data(n=n) elif np.isin(n_type, ['cv']): self.generate_cv_data(n=n) # cv data added to self.X_data_sample init_names = ['init_' + str(i + 1) for i in np.arange(n)] with self.model: for i, name in enumerate(init_names): # when type is molecular cross-validation or bootstrap, # replace self.x_data tensor with new data if np.isin(n_type, ['cv', 'bootstrap']): more_replacements = { self.x_data: self.X_data_sample[i].astype(self.data_type) } else: more_replacements = {} # train the model self.mean_field[name] = pm.fit( self.n_iter, method='advi', callbacks=[CheckParametersConvergence()], obj_optimizer=pm.adam(learning_rate=self.learning_rate), total_grad_norm_constraint=self.total_grad_norm_constraint, more_replacements=more_replacements) # plot training history if self.verbose: print( plt.plot(np.log10(self.mean_field[name].hist[15000:])))
def fit(self, X, y): X = _check_X_input(X) y = _check_1d_inp(y) assert X.shape[0] == y.shape[0] with pm.Model(): pm.glm.linear.GLM(X, y, family='binomial') pm.find_MAP() if self.method == 'advi': trace = pm.fit(progressbar=False, n=niter) if self.method == 'nuts': trace = pm.sample(progressbar=False, draws=niter) if self.method == 'advi': self.coefs = [i for i in trace.sample(nsamples)] elif self.method == 'nuts': samples_chosen = np.random.choice(np.arange(len(trace)), size=nsamples, replace=False) samples_chosen = set(list(samples_chosen)) self.coefs = [i for i in trace if i in samples_chosen] else: raise ValueError("'method' must be one of 'advi' or 'nuts'") self.coefs = pd.DataFrame.from_dict(coefs) self.coefs = coefs[['Intercept'] + ['x' + str(i) for i in range(X.shape[1])]] self.intercept = coefs['Intercept'].values.reshape((-1, 1)).copy() del self.coefs['Intercept'] self.coefs = coefs.values.T
def sample(self): TIME0 = datetime.now() if self.inference == "NUTS": with self.model: trace = pm.sample(draws=self.draws, cores=self.cores, chains=self.chains, tune=self.tune, progressbar=self.progressbar, target_accept=.95) # could set target_accept=.95 to get smaller step size if warnings appear elif self.inference == "ADVI": with self.model: mean_field = pm.fit(n=10000, method="fullrank_advi", progressbar=self.progressbar) # TODO: trace is just a workaround here so the rest of the code understands # ADVI. We could communicate parameters from mean_fied directly. trace = mean_field.sample(1000) else: raise NotImplementedError TIME1 = datetime.now() print("Finished job {0} in {1:.0f} seconds.".format( os.getpid(), (TIME1 - TIME0).total_seconds())) return trace
def model_fit_using_se(data, u_dim, method='mcmc', num_iter=10, num_sample=1000): search_dim = data['search'].shape[1] num_obs = data['search'].shape[0] cov_u, mu_u = np.eye(u_dim), np.zeros(u_dim) cov_nlp, mu_nlp = np.eye(search_dim), np.zeros(search_dim) cov_nlp = np.loadtxt("similarity_matrix.csv", dtype='float32', delimiter=',') with pm.Model() as model: u = pm.MvNormal('u', mu=mu_u, cov=cov_u, shape=(num_obs, u_dim)) search = data[ 'search'] #pm.MvNormal('search',mu=mu_search,cov=cov_search,observed=data['search']) #Incoming edge to self esteem u_se = pm.MvNormal('u_se', mu=mu_u, cov=cov_u, shape=u_dim) search_se = pm.MvNormal('search_se', mu=mu_nlp, cov=cov_nlp, shape=search_dim) #self esteem as a function of its parents se_mean = tt.nnet.nnet.sigmoid( tt.dot(search, search_se) + tt.dot(u, u_se)) se = pm.Bernoulli('se', p=se_mean, observed=data['se']) #Incoming edge to suicide ideation u_si = pm.MvNormal('u_si', mu=mu_u, cov=cov_u, shape=u_dim) search_si = pm.MvNormal('search_si', mu=mu_nlp, cov=cov_nlp, shape=search_dim) #se_si_sig = pm.Normal('se_si_sig',mu=0,tau=1) se_si = pm.HalfNormal('se_si', sigma=1) #se_si_sig**2) si_mean = tt.nnet.nnet.sigmoid( tt.dot(search, search_si) + tt.dot(u, u_si) + se_si * se) si = pm.Bernoulli('si', p=si_mean, observed=data['si']) mf = pm.fit(n=num_iter) #trace = pm.sample() trace = mf.sample(num_sample) #pm.traceplot(trace) #trace = pm.sample() # se_mean = pm.Uniform('se_mean',lower=0,upper=1,size=num_obs) # se = pm.Bernoulli('se',p=se_mean, observed = data["se"]) # si_me # si = pm.Bernoulli('si',p= ,observed = data["si"]) return trace
def fast_sample(self, sample_size=5000, iters=10000): if self.model is None: self.fit() with self.model: inference = pm.ADVI() approx = pm.fit(n=iters, method=inference) #until converge self.trace = approx.sample(draws=sample_size)
def fit_ADVI(self, n_samples=2000, n_iter=100000, inference='advi', **fit_kws): with self.model: self.approx_fit = pm.fit(n=n_iter, method=inference, **fit_kws) self.trace_ = self.approx_fit.sample(draws=n_samples)
def fit(self, sample_size, traceplot_name=None, fast_sampling=False): ''' sample_size (int): The size of the sample traceplot_name (str): The name of the traceplot file fast_sampling (bool): whether or not variational approximation should be used. Note: to evaluate the kernel function, pymc3 only accept tensor type from theano. ''' self.model = pm.Model() # self.X_train = tt.constant(self.X_train) #need tensor type self.X_train = shared(self.X_train) with self.model: evaluated_kernels = [] packed_L = pm.LKJCholeskyCov('packed_L', n=3, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(3, packed_L) for center in self.centers.values: evaluated_kernels.append( pm.MvNormal.dist(mu=center, chol=L).logp(self.X_train)) beta = pm.Normal('beta', mu=0, sd=3, shape=self.number_of_centers) latentProcess = pm.Deterministic('mu', tt.dot(beta, evaluated_kernels)) error = pm.HalfCauchy('error', 12) y_ = pm.Normal("y", mu=latentProcess, sd=error, observed=np.log(self.y_train)) if fast_sampling: with self.model: inference = pm.ADVI() approx = pm.fit(n=sample_size, method=inference) #until converge self.trace = approx.sample(draws=sample_size) else: with self.model: start = pm.find_MAP() self.trace = pm.sample(sample_size, start=start) if traceplot_name: fig, axs = plt.subplots(3, 2) # 2 RVs pm.traceplot(self.trace, varnames=['packed_L', 'beta', 'error'], ax=axs) fig.savefig(traceplot_name) fig_path = os.path.join(os.getcwd(), traceplot_name) print(f'the traceplot has been saved to {fig_path}')
def _fit(self, X, Y, sampler='vi', **kwargs): self.construct_model(X, Y) callbacks = kwargs['vi_params'].get('callbacks', []) kwargs['random_seed'] = self.random_state.randint(2**32, dtype='uint32') for i, c in enumerate(callbacks): if isinstance(c, pm.callbacks.CheckParametersConvergence): params = c.__dict__ params.pop('_diff') params.pop('prev') params.pop('ord') params['diff'] = 'absolute' callbacks[i] = pm.callbacks.CheckParametersConvergence( **params) if sampler == 'vi': random_seed = kwargs['random_seed'] with self.model: sample_params = kwargs['sample_params'] vi_params = kwargs['vi_params'] vi_params['random_seed'] = sample_params[ 'random_seed'] = random_seed draws_ = kwargs['draws'] try: self.trace = pm.sample(**sample_params) vi_params['start'] = self.trace[-1] self.trace_vi = pm.fit(**vi_params) self.trace = self.trace_vi.sample(draws=draws_) except Exception as e: if hasattr(e, 'message'): message = e.message else: message = e self.logger.error(message) self.trace_vi = None self.trace = None if self.trace_vi is None and self.trace is None: with self.model: self.logger.info( "Error in vi ADVI sampler using nuts sampler with draws {}" .format(draws_)) nuts_params = copy.deepcopy(sample_params) nuts_params['tune'] = nuts_params['draws'] = 50 self.logger.info("Params {}".format(nuts_params)) self.trace = pm.sample(**nuts_params) elif sampler == 'metropolis': with self.model: start = pm.find_MAP() self.trace = pm.sample(**kwargs, step=pm.Metropolis(), start=start) else: with self.model: self.trace = pm.sample(**kwargs, step=pm.NUTS())
def fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs): callbacks = vi_params.get("callbacks", []) for i, c in enumerate(callbacks): if isinstance(c, CheckParametersConvergence): params = c.__dict__ params.pop("_diff") params.pop("prev") params.pop("ord") params["diff"] = "absolute" callbacks[i] = CheckParametersConvergence(**params) if sampler == "variational": with self.model: try: self.trace_ = pm.sample(chains=2, cores=8, tune=5, draws=5) vi_params["start"] = self.trace_[-1] self.trace_vi_ = pm.fit(**vi_params) self.trace_ = self.trace_vi_.sample(draws=draws) except Exception as e: if hasattr(e, "message"): message = e.message else: message = e logger.error(message) self.trace_vi_ = None if self.trace_vi_ is None and self.trace_ is None: with self.model: logger.info( "Error in vi ADVI sampler using Metropolis sampler with draws {}" .format(draws)) self.trace = pm.sample(chains=1, cores=4, tune=20, draws=20, step=pm.NUTS()) elif sampler == "metropolis": with self.model: start = pm.find_MAP() self.trace_ = pm.sample( chains=2, cores=8, tune=tune, draws=draws, **kwargs, step=pm.Metropolis(), start=start, ) else: with self.model: self.trace_ = pm.sample(chains=2, cores=8, tune=tune, draws=draws, **kwargs, step=pm.NUTS())
def fit(self, instances: np.ndarray, labels: np.ndarray) -> Optional[List[str]]: self.model = self._construct_nn(instances, labels) with self.model: inference = pm.ADVI() self.approx = pm.fit(n=EPOCHS, method=inference) self.sample_proba = self._sample_probability(instances) return None
def model_uncertainty(splits, stakes, actions, temp=1., sd=1.): with pm.Model() as repeated_model: r = pm.Gamma('r', alpha=1, beta=1) p = pm.Gamma('p', alpha=1, beta=1) t = pm.Beta('t', alpha=2, beta=5) st = pm.Beta('st', alpha=1, beta=1) c = pm.Gamma('c', alpha=1, beta=1) odds_a = np.exp(2 * r * splits + c * stakes**st) odds_r = np.exp(p * (splits < 0.5 - t / 2)) p = odds_a / (odds_r + odds_a) a = pm.Binomial('a', 1, p, observed=actions) fitted = pm.fit(method='advi') trace_repeated = fitted.sample(2000) # trace_repeated = pm.sample(200000, step=pm.Slice(), chains=2, cores=4) # with pm.Model() as simple_model: # r = pm.Normal('r', mu=0, sd=1) # p = np.exp(r*splits) / (1 + np.exp(r*splits)) # a = pm.Binomial('a', 1, p, observed=actions) # trace_simple = pm.sample(2000, init='map') with pm.Model() as fairness_model: r = pm.Gamma('r', alpha=1, beta=1) t = pm.Beta('t', alpha=2, beta=5) f = pm.Normal('f', mu=0, sd=sd) st = pm.Beta('st', alpha=1, beta=1) c = pm.Gamma('c', alpha=1, beta=1) odds = np.exp(c * stakes**st + splits * r - f * (splits < 0.5 - t / 2)) p = odds / (1 + odds) a = pm.Binomial('a', 1, p, observed=actions) fitted = pm.fit(method='advi') trace_fairness = fitted.sample(2000) # trace_fairness = pm.sample(200000, step=pm.Slice(), chains=2, cores=4) fairness_model.name = 'fair' repeated_model.name = 'repeated' model_dict = dict( zip([fairness_model, repeated_model], [trace_fairness, trace_repeated])) comp = pm.compare(model_dict, ic='LOO', method='BB-pseudo-BMA') return trace_fairness, trace_repeated, comp
def test_var_replacement(): X_mean = pm.floatX(np.linspace(0, 10, 10)) y = pm.floatX(np.random.normal(X_mean*4, .05)) with pm.Model(): inp = pm.Normal('X', X_mean, shape=X_mean.shape) coef = pm.Normal('b', 4.) mean = inp * coef pm.Normal('y', mean, .1, observed=y) advi = pm.fit(100) assert advi.sample_node(mean).eval().shape == (10, ) x_new = pm.floatX(np.linspace(0, 10, 11)) assert advi.sample_node(mean, more_replacements={inp: x_new}).eval().shape == (11, )
def init_nuts(init='auto', njobs=1, n_init=500000, model=None, random_seed=-1, progressbar=True, **kwargs): """Set up the mass matrix initialization for NUTS. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. This function implements different methods for choosing or adapting the mass matrix. Parameters ---------- init : str Initialization method to use. * auto : Choose a default initialization method automatically. Currently, this is `'jitter+adapt_diag'`, but this can change in the future. If you depend on the exact behaviour, choose an initialization method explicitly. * adapt_diag : Start with a identity mass matrix and then adapt a diagonal based on the variance of the tuning samples. All chains use the test value (usually the prior mean) as starting point. * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter in [-1, 1] to the starting point in each chain. * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal mass matrix based on the sample variance of the tuning samples. * advi+adapt_diag_grad : Run ADVI and then adapt the resulting diagonal mass matrix based on the variance of the gradients during tuning. This is **experimental** and might be removed in a future release. * advi : Run ADVI to estimate posterior mean and diagonal mass matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. This is discouraged. * nuts : Run NUTS and estimate posterior mean and mass matrix from the trace. njobs : int Number of parallel jobs to start. n_init : int Number of iterations of initializer If 'ADVI', number of iterations, if 'nuts', number of draws. model : Model (optional if in `with` context) progressbar : bool Whether or not to display a progressbar for advi sampling. **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) vars = kwargs.get('vars', model.vars) if set(vars) != set(model.vars): raise ValueError('Must use init_nuts on all variables of a model.') if not pm.model.all_continuous(vars): raise ValueError('init_nuts can only be used for models with only ' 'continuous variables.') if not isinstance(init, str): raise TypeError('init must be a string.') if init is not None: init = init.lower() if init == 'auto': init = 'jitter+adapt_diag' pm._log.info('Initializing NUTS using {}...'.format(init)) random_seed = int(np.atleast_1d(random_seed)[0]) cb = [ pm.callbacks.CheckParametersConvergence( tolerance=1e-2, diff='absolute'), pm.callbacks.CheckParametersConvergence( tolerance=1e-2, diff='relative'), ] if init == 'adapt_diag': start = [model.test_point] * njobs mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) if njobs == 1: start = start[0] elif init == 'jitter+adapt_diag': start = [] for _ in range(njobs): mean = {var: val.copy() for var, val in model.test_point.items()} for val in mean.values(): val[...] += 2 * np.random.rand(*val.shape) - 1 start.append(mean) mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) if njobs == 1: start = start[0] elif init == 'advi+adapt_diag_grad': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=njobs) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdaptGrad( model.ndim, mean, cov, weight) if njobs == 1: start = start[0] elif init == 'advi+adapt_diag': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=njobs) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, cov, weight) if njobs == 1: start = start[0] elif init == 'advi': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window ) # type: pm.MeanField start = approx.sample(draws=njobs) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 potential = quadpotential.QuadPotentialDiag(cov) if njobs == 1: start = start[0] elif init == 'advi_map': start = pm.find_MAP() approx = pm.MeanField(model=model, start=start) pm.fit( random_seed=random_seed, n=n_init, method=pm.KLqp(approx), callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window ) start = approx.sample(draws=njobs) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 potential = quadpotential.QuadPotentialDiag(cov) if njobs == 1: start = start[0] elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) start = [start] * njobs potential = quadpotential.QuadPotentialFull(cov) if njobs == 1: start = start[0] elif init == 'nuts': init_trace = pm.sample(draws=n_init, step=pm.NUTS(), tune=n_init // 2, random_seed=random_seed) cov = np.atleast_1d(pm.trace_cov(init_trace)) start = list(np.random.choice(init_trace, njobs)) potential = quadpotential.QuadPotentialFull(cov) if njobs == 1: start = start[0] else: raise NotImplementedError('Initializer {} is not supported.'.format(init)) step = pm.NUTS(potential=potential, **kwargs) return start, step
output = pm.Poisson('spikes', mu = p, observed = spikes) # Sample from the model - using 2 chains in parallel (minimum to compare traceplots and rhat values) # Eventually variational inference with advi seems a better prospect - NUTS is too slow/finicky to sample # The logic of using ADVI here follows from: https://pymc-devs.github.io/pymc3/notebooks/bayesian_neural_network_opvi-advi.html # And also from: https://pymc-devs.github.io/pymc3/notebooks/variational_api_quickstart.html # Here we aren't scaling the variance of the gradient as it doesn't seem to give much improvement in simple models (look at the first link above) # We also use callbacks similar to those used in the 'init' portion of pm.sample - this stops ADVI once it has converged/ELBO doesn't change beyond a threshold cb = [pm.callbacks.CheckParametersConvergence(diff='absolute', tolerance = 1e-4), pm.callbacks.CheckParametersConvergence(diff='relative', tolerance = 1e-4),] with model: #trace = pm.sample(num_samples + 1000, tune = 1000)[1000:] #v_params = pm.variational.advi(n = 200000) #trace = pm.variational.sample_vp(v_params, draws=num_samples) inference = pm.fit(n=200000, method = 'fullrank_advi', callbacks = cb) trace = inference.sample(num_samples) # Print the Gelman-Rubin statistics for this model to file #print('\n', file = f) #print("======================== Unit {} ============================", file = f) #print(pm.diagnostics.gelman_rubin(trace), file = f) #print("=============================================================", file = f) # Run through the laser conditions and tastes again, and save the model results in results # The strategy now is to run through the MCMC samples, and calculate the difference in the Poisson mean between the laser on and off conditions for the different tastes for laser_status in range(lasers.shape[0] - 1): for stimulus in range(len(trains_dig_in)): # First calculate the mean firing rate for the laser off (control) condition for this taste bayesian_results[laser_status, stimulus, :, 0] = np.exp(trace['b_t'][:, stimulus] + trace['b_l'][:, 2*laser_status + 1] + trace['b_t_l'][:, stimulus, 2*laser_status + 1])