def _fit_time_series_model(self, signal, target, samples): model_randomwalk = pm.Model() with model_randomwalk: sigma_alpha = pm.Exponential('sigma_alpha', 1. / .02, testval=.1) sigma_beta = pm.Exponential('sigma_beta', 1. / .02, testval=.1) alpha = GaussianRandomWalk('alpha', sigma_alpha ** -2, shape=len(tar)) beta = GaussianRandomWalk('beta', sigma_beta ** -2, shape=len(tar)) # Define regression regression = alpha + beta * rev.values # Assume prices are Normally distributed, the mean comes from the regression. sd = pm.Uniform('sd', 0, 20) likelihood = pm.Normal('y', mu=regression, sd=sd, observed=tar.values) # First optimize random walk start = pm.find_MAP(vars=[alpha, beta], fmin=optimize.fmin_l_bfgs_b) step = pm.NUTS(scaling=start) trace = pm.sample(10, step, start) # Sample start2 = trace.point(-1) step = pm.NUTS(scaling=start2) trace_rw = pm.sample(samples, step, start=start)
def init_nuts(init='advi', n_init=500000, model=None, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. In our experience, using ADVI to estimate a diagonal covariance matrix and using this as the scaling matrix produces robust results over a wide class of continuous models. Parameters ---------- init : str {'advi', 'advi_map', 'map', 'nuts'} Initialization method to use. * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. * nuts : Run NUTS and estimate posterior mean and covariance matrix. n_init : int Number of iterations of initializer If 'advi', number of iterations, if 'metropolis', number of draws. model : Model (optional if in `with` context) **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start, nuts_sampler start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) pm._log.info('Initializing NUTS using {}...'.format(init)) if init == 'advi': v_params = pm.variational.advi(n=n_init) start = pm.variational.sample_vp(v_params, 1, progressbar=False)[0] cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() v_params = pm.variational.advi(n=n_init, start=start) cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) elif init == 'nuts': init_trace = pm.sample(step=pm.NUTS(), draws=n_init) cov = pm.trace_cov(init_trace[n_init//2:]) start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames} else: raise NotImplemented('Initializer {} is not supported.'.format(init)) step = pm.NUTS(scaling=cov, is_cov=True, **kwargs) return start, step
def model_returns_t(data, samples=500): """Run Bayesian model assuming returns are normally distributed. Parameters ---------- returns : pandas.Series Series of simple returns of an algorithm or stock. samples : int, optional Number of posterior samples to draw. Returns ------- pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. """ with pm.Model(): mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean()) sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std()) nu = pm.Exponential('nu_minus_two', 1. / 10., testval=3.) returns = pm.T('returns', nu=nu + 2, mu=mu, sd=sigma, observed=data) pm.Deterministic('annual volatility', returns.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic('sharpe', returns.distribution.mean / returns.distribution.variance**.5 * np.sqrt(252)) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) step = pm.NUTS(scaling=start) trace = pm.sample(samples, step, start=start) return trace
def fit(self, x, y, mcmc_samples=1000): t = x.shape[0] - 1 # number of additive components varnames = ["xc", "w", "decay", "sigma", "b", "lam"] with pm.Model() as model: # Priors for additive predictor w = pm.Normal("w", mu=0, sd=1, shape=t) decay = pm.HalfNormal("decay", sd=200, shape=t) # Prior for likelihood sigma = pm.Uniform("sigma", 0, 0.3) b = pm.Normal("b", mu=0, sd=20) lam = pm.Uniform("lam", 0, 0.3) # Building linear predictor lin_pred = 0 for ii in range(1, t + 1): lin_pred += self.bias(w[ii - 1], decay[ii - 1])(x[ii, :]) phi2 = pm.Deterministic("phi2", 0.5 * lam + (1 - lam) * phi(b + lin_pred + x[0, :] / sigma)) y = pm.Bernoulli("y", p=phi2, observed=y) with model: # Inference start = pm.find_MAP() # Find starting value by optimization print("MAP found:") # step = pm.NUTS(scaling = start) # step = pm.Slice() step = pm.NUTS(scaling=start) trace = pm.sample(mcmc_samples, step, start=start, progressbar=True) # draw posterior samples return trace, model
def fit(self,xdata,ydata,yerr,arange=[-100.,100],brange=[-100.,100]): trace = None with pm.Model() as model: # alpha = pm.Normal('alpha', mu=1.0e7, sd=1.0e6) # beta = pm.Normal('beta', mu=1.0e7, sd=1.0e6) # sigma = pm.Uniform('sigma', lower=0, upper=20) alpha = pm.Uniform('alpha', lower=arange[0], upper=arange[1]) beta = pm.Uniform('beta', lower=brange[0], upper=brange[1]) sigma = yerr y_est = alpha + beta * xdata likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=ydata) # obtain starting values via MAP start = pm.find_MAP() step = pm.NUTS(state=start) trace = pm.sample(2000, step, start=start, progressbar=False) # pm.traceplot(trace) # plt.show() # pprint(trace['alpha'].mean()) # pprint(trace['alpha'].std()) # print pm.summary(trace) # print pm.summary(trace, ['alpha']) # print pm.stats() # print(trace.__dict__) # Return the traces return [trace['alpha'], trace['beta']]
def sample_pymc3(d, samples=2000, njobs=2): with pm.Model() as model: dfc = pm.Normal(mu=0.0, sd=d['sigma_fc'], name='dfc') Q = pm.Gamma(mu=d['mu_Q'], sd=d['sigma_Q'], name='Q') Pdet = pm.Gamma(mu=d['mu_Pdet'], sd=d['sigma_Pdet'], name='Pdet') kc = pm.Gamma(mu=d['mu_kc'], sd=d['sigma_kc'], name='kc') M = d['M'] T = d['T'] scale=d['scale'] mu_fc = d['mu_fc'] f = d['f'] like = pm.Gamma(alpha=M, beta=(M/(((2 * 1.381e-5 * T) / (np.pi * Q * kc)) / scale * (dfc + mu_fc)**3 / ((f * f - (dfc + mu_fc)**2) * (f * f - (dfc + mu_fc)**2) + f * f * (dfc + mu_fc)**2 / Q**2) + Pdet)), observed=d['y'], name='like') start = pm.find_MAP() step = pm.NUTS(state=start) trace = pm.sample(samples, step=step, start=start, progressbar=True, njobs=njobs) return trace
def run(self, samples=1000, find_map=True, verbose=True, step='nuts', burn=0.5, **kwargs): ''' Run the model. Args: samples (int): Number of MCMC samples to generate find_map (bool): passed to find_map argument of pm.sample() verbose (bool): if True, prints additional information step (str or PyMC3 Sampler): either an instantiated PyMC3 sampler, or the name of the sampler to use (either 'nuts' or 'metropolis'). start: Optional starting point to pass onto sampler. burn (int or float): Number or proportion of samples to treat as burn-in; passed onto the BayesianModelResults instance returned by this method. kwargs (dict): optional keyword arguments passed on to the sampler. Returns: an instance of class BayesianModelResults. ''' with self.model: njobs = kwargs.pop('njobs', 1) start = kwargs.pop('start', pm.find_MAP() if find_map else None) chain = kwargs.pop('chain', 0) if isinstance(step, string_types): step = { 'nuts': pm.NUTS, 'metropolis': pm.Metropolis }[step.lower()](**kwargs) self.start = start trace = pm.sample( samples, start=start, step=step, progressbar=verbose, njobs=njobs, chain=chain) self.last_trace = trace # for convenience return BayesianModelResults(trace)
def test_linear_component(self): vars_to_create = { 'sigma', 'sigma_interval__', 'y_obs', 'lm_x0', 'lm_Intercept' } with Model() as model: lm = LinearComponent( self.data_linear['x'], self.data_linear['y'], name='lm' ) # yields lm_x0, lm_Intercept sigma = Uniform('sigma', 0, 20) # yields sigma_interval__ Normal('y_obs', mu=lm.y_est, sigma=sigma, observed=self.y_linear) # yields y_obs start = find_MAP(vars=[sigma]) step = Slice(model.vars) trace = sample(500, tune=0, step=step, start=start, progressbar=False, random_seed=self.random_seed) assert round(abs(np.mean(trace['lm_Intercept'])-self.intercept), 1) == 0 assert round(abs(np.mean(trace['lm_x0'])-self.slope), 1) == 0 assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0 assert vars_to_create == set(model.named_vars.keys())
def lin_fit(t, y, yerr=None, samples=10000, sampler="NUTS", alphalims=[-100,100]): """ Bayesian linear fitting function. See Jake Vanderplas' blog post on how to be a bayesian in python for more details uses pymc3 MCMC sampling inputs: t :: Vector of values at which the function is evaluated ("x" values) y :: Vector of dependent values (observed y(t)) yerr (optional = None) :: Errors on y values. If not provided, errors are taken to be the same for each dta point, with a 1/sigma (jefferys) prior. samples (optional = 1000) :: Number of samples to draw from MCMC sampler (optional = "NUTS") :: Type of MCMC sampler to use. "NUTS" or "Metropolis" alphalims (optional = [-100,100]) :: Length 2 vector of endpoints for uniform prior on intercept of the line """ with pm.Model() as model: #Use uninformative priors on slope/intercept of line alpha = pm.Uniform('alpha',alphalims[0],alphalims[1]) #this defines an uninformative prior on slope. See Jake's blog post beta = pm.DensityDist('beta',lambda value: -1.5 * T.log(1 + value**2.),testval=0) #if yerr not given, assume all values have same errorbar if yerr is None: sigma = pm.DensityDist('sigma', lambda value: -T.log(T.abs_(value)),testval=1) else: sigma = yerr like = pm.Normal('likelihood',mu=alpha+beta*t, sd=sigma, observed=y) #start the sampler at the maximum a-posteriori value start = pm.find_MAP() step = select_sampler(sampler,start) trace = pm.sample(draws=samples,start=start,step=step) return trace
def run(n=5000): with model_1: xstart = pm.find_MAP() xstep = pm.Slice() trace = pm.sample(5000, xstep, xstart, random_seed=123, progressbar=True) pm.summary(trace)
def model_returns_t_alpha_beta(data, bmark, samples=2000): """Run Bayesian alpha-beta-model with T distributed returns. This model estimates intercept (alpha) and slope (beta) of two return sets. Usually, these will be algorithm returns and benchmark returns (e.g. S&P500). The data is assumed to be T distributed and thus is robust to outliers and takes tail events into account. Parameters ---------- returns : pandas.Series Series of simple returns of an algorithm or stock. bmark : pandas.Series Series of simple returns of a benchmark like the S&P500. If bmark has more recent returns than returns_train, these dates will be treated as missing values and predictions will be generated for them taking market correlations into account. samples : int (optional) Number of posterior samples to draw. Returns ------- pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. """ if len(data) != len(bmark): # pad missing data data = pd.Series(data, index=bmark.index) data_no_missing = data.dropna() with pm.Model(): sigma = pm.HalfCauchy( 'sigma', beta=1, testval=data_no_missing.values.std()) nu = pm.Exponential('nu_minus_two', 1. / 10., testval=.3) # alpha and beta beta_init, alpha_init = sp.stats.linregress( bmark.loc[data_no_missing.index], data_no_missing)[:2] alpha_reg = pm.Normal('alpha', mu=0, sd=.1, testval=alpha_init) beta_reg = pm.Normal('beta', mu=0, sd=1, testval=beta_init) pm.T('returns', nu=nu + 2, mu=alpha_reg + beta_reg * bmark, sd=sigma, observed=data) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) step = pm.NUTS(scaling=start) trace = pm.sample(samples, step, start=start) return trace
def run(n=1000): if n == "short": n = 50 with model: start = pm.find_MAP() step = pm.NUTS(scaling=start) trace = pm.sample(n, step=step, start=start) return trace
def _inference(self, reinit=True): with self.cached_model: if reinit or (self.cached_start is None) or (self.cached_sampler is None): self.cached_start = pm.find_MAP(fmin=sp.optimize.fmin_powell) self.cached_sampler = pm.NUTS(scaling=self.cached_start) trace = pm.sample(self.samples, self.cached_sampler, start=self.cached_start) return trace
def fit(self, X, y, sampling_iterations): X = self._force_shape(X) self.input_data_dimension = len(X[0]) model, w, b = self._build_model(X, y) with model: self.map_estimate = pymc3.find_MAP(model=model, vars=[w, b]) step = pymc3.NUTS(scaling=self.map_estimate) trace = pymc3.sample(sampling_iterations, step, start=self.map_estimate) self.samples = trace
def learn_model(model, draws=50000): with model: start = pm.find_MAP() #step = pm.Slice() # It is very slow when the model has many parameters #step = pm.HamiltonianMC(scaling=start) # It leads to constant samples #step = pm.NUTS(scaling=start) # It leads to constant samples step = pm.Metropolis() trace = pm.sample(draws, step, start=start) return trace
def test_run(self): model = self.build_model() with model: # move the chain to the MAP which should be a good starting point start = pm.find_MAP() H = model.fastd2logp() # find a good orientation using the hessian at the MAP h = H(start) step = pm.HamiltonianMC(model.vars, h) pm.sample(50, step, start)
def test_errors(): _, model, _ = exponential_beta(2) with model: try: newstart = find_MAP(Point(x=[-0.5, 0.01], y=[0.5, 4.4])) except ValueError as e: msg = str(e) assert "x.logp" in msg, msg assert "x.value" not in msg, msg else: assert False, newstart
def run(n=2000): if n == "short": n = 50 import matplotlib.pyplot as plt with model: start = find_MAP(fmin=opt.fmin_powell) trace = sample(n, Slice(), start=start) plt.plot(x, y, 'x') glm.plot_posterior_predictive(trace)
def test_glm_from_formula(self): with Model() as model: NAME = 'glm' GLM.from_formula('y ~ x', self.data_linear, name=NAME) start = find_MAP() step = Slice(model.vars) trace = sample(500, step=step, start=start, progressbar=False, random_seed=self.random_seed) assert round(abs(np.mean(trace['%s_Intercept' % NAME])-self.intercept), 1) == 0 assert round(abs(np.mean(trace['%s_x' % NAME])-self.slope), 1) == 0 assert round(abs(np.mean(trace['%s_sd' % NAME])-self.sd), 1) == 0
def test_linear_component(self): with Model() as model: y_est, _ = glm.linear_component('y ~ x', self.data_linear) sigma = Uniform('sigma', 0, 20) Normal('y_obs', mu=y_est, sd=sigma, observed=self.y_linear) start = find_MAP(vars=[sigma]) step = Slice(model.vars) trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed) self.assertAlmostEqual(np.mean(trace['Intercept']), self.intercept, 1) self.assertAlmostEqual(np.mean(trace['x']), self.slope, 1) self.assertAlmostEqual(np.mean(trace['sigma']), self.sd, 1)
def _find_map(self): """Find mode of posterior using Powell optimization.""" tstart = time.time() with self.model: logging.info('finding PMF MAP using Powell optimization...') self._map = pm.find_MAP(fmin=sp.optimize.fmin_powell, disp=True) elapsed = int(time.time() - tstart) logging.info('found PMF MAP in %d seconds' % elapsed) # This is going to take a good deal of time to find, so let's save it. save_np_vars(self._map, self.map_dir)
def test_linear_component_from_formula(self): with Model() as model: lm = LinearComponent.from_formula('y ~ x', self.data_linear) sigma = Uniform('sigma', 0, 20) Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear) start = find_MAP(vars=[sigma]) step = Slice(model.vars) trace = sample(500, step=step, start=start, progressbar=False, random_seed=self.random_seed) assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0 assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0 assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0
def BayesianLearning(fig, path, measurements, pos_min=-50, pos_max=50, subplot=133): with pm.Model() as model: # Compute bounds based on measurements pos_min_x, pos_max_x, pos_min_y, pos_max_y = boundsFromPath(path) minPos = min(pos_min_x, pos_min_y) maxPos = max(pos_max_x, pos_max_y) # Priors # See: http://stackoverflow.com/q/25342899 thermal_position_x = pm.Uniform('thermal_position_x', lower=pos_min_x, upper=pos_max_x) thermal_position_y = pm.Uniform('thermal_position_y', lower=pos_min_y, upper=pos_max_y) thermal_amplitude = pm.Uniform('thermal_amplitude', lower=-10, upper=10) thermal_sd = pm.Uniform('sd', lower=0.1, upper=100) # When sampling, look at the values of the test thermal field at the points # we have taken measurements at. velocity = deterministicVelocity(path, measurements, thermal_position_x, thermal_position_y, thermal_amplitude, thermal_sd) # Observe the vertical velocities thermal_vert_vel = pm.Normal('thermal_vert_vel', mu=velocity, observed=measurements) # Sample this to find the posterior, note Metropolis works with discrete step = pm.Metropolis() start = pm.find_MAP(fmin=sp.optimize.fmin_powell) trace = pm.sample(2000, step=step, progressbar=True, start=start) # Find the most probable surface and plot that for comparison x = lameMAP(trace['thermal_position_x']) y = lameMAP(trace['thermal_position_y']) amp = lameMAP(trace['thermal_amplitude']) sd = lameMAP(trace['sd']) eq = thermalEq((x,y), amp, sd) # Plot it prev = plt.gca() visualizeThermalField([eq], path, measurements, trace, pos_min, pos_max, only2d=False, fig=fig, subplot=subplot, lines=False, limits=[prev.get_xlim(),prev.get_ylim(),prev.get_zlim()]) # Really, we have more information than just this MAP estimate. # We have probability distributions over all the parameters. # It's hard to visualize this in one figure that we can directly # compare with the GPR though. pm.traceplot(trace, ['thermal_position_x','thermal_position_y', 'thermal_amplitude','sd'])
def too_slow(self): model = self.build_model() start = {'groupmean': self.obs_means.mean(), 'groupsd_interval_': 0, 'sd_interval_': 0, 'means': self.obs_means, 'floor_m': 0., } with model: start = pm.find_MAP(start=start, vars=[model['groupmean'], model['sd_interval_'], model['floor_m']]) step = pm.NUTS(model.vars, scaling=start) pm.sample(50, step, start)
def _get_norm_params(self): trace = [] for i, s in zip([0, 1], ['+', '-']): print "Estimating Gaussian parameters in %s strand" % s with pm.Model() as model: model.verbose = 0 mu = pm.Uniform('mu') sigma = pm.Uniform('sigma') tau = 1 / sigma**2 y_pred = pm.Normal('y_pred', mu=mu, tau=tau) y_est = pm.Normal('y_est', mu=mu, tau=tau, observed=self.cleanFcArray[i]) start = pm.find_MAP() step = pm.Metropolis() trace.append(pm.sample(self.mcmcSteps, step, start=start, progressbar=self.showProgress)) print return trace
def model_stoch_vol(data, samples=2000): """Run stochastic volatility model. This model estimates the volatility of a returns series over time. Returns are assumed to be T-distributed. lambda (width of T-distributed) is assumed to follow a random-walk. Parameters ---------- data : pandas.Series Return series to model. samples : int, optional Posterior samples to draw. Returns ------- model : pymc.Model object PyMC3 model containing all random variables. trace : pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. See Also -------- plot_stoch_vol : plotting of tochastic volatility model """ from pymc3.distributions.timeseries import GaussianRandomWalk with pm.Model() as model: nu = pm.Exponential('nu', 1. / 10, testval=5.) sigma = pm.Exponential('sigma', 1. / .02, testval=.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(data)) volatility_process = pm.Deterministic('volatility_process', pm.exp(-2 * s)) StudentT('r', nu, lam=volatility_process, observed=data) start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b) step = pm.NUTS(scaling=start) trace = pm.sample(100, step, progressbar=False) # Start next run at the last sampled position. step = pm.NUTS(scaling=trace[-1], gamma=.25) trace = pm.sample(samples, step, start=trace[-1], progressbar=False) return model, trace
def too_slow(self): model = self.build_model() with model: start = pm.Point({ 'groupmean': self.obs_means.mean(), 'groupsd_interval_': 0, 'sd_interval_': 0, 'means': np.array(self.obs_means), 'u_m': np.array([.72]), 'floor_m': 0., }) start = pm.find_MAP(start, model.vars[:-1]) H = model.fastd2logp() h = np.diag(H(start)) step = pm.HamiltonianMC(model.vars, h) pm.sample(50, step, start)
def test_bernoulli(): data = [random.randint(0,1) for i in range(200)] model = pymc3.Model() with model: p = pymc3.Uniform(lower=0,upper=1, name='p') X = pymc3.Bernoulli(p=p, name='X', observed=data) start = pymc3.find_MAP() # instantiate sampler step = pymc3.NUTS(scaling=start) # draw 500 posterior samples trace = pymc3.sample(10000, step, start=start) pymc3.traceplot(trace) plt.show()
def main(): X, Y = generate_sample() with pm.Model() as model: alpha = pm.Normal('alpha', mu=0, sd=20) beta = pm.Normal('beta', mu=0, sd=20) sigma = pm.Uniform('sigma', lower=0) y = pm.Normal('y', mu=beta*X+alpha, sd=sigma, observed=Y) start = pm.find_MAP() step = pm.NUTS(state=start) with model: if (multicore): trace = pm.sample(itenum, step, start=start, njobs=chainnum, random_seed=range(chainnum), progressbar=progress) else: ts = [pm.sample(itenum, step, chain=i, progressbar=progress) for i in range(chainnum)] trace = merge_traces(ts) if (saveimage): pm.traceplot(trace).savefig("simple_linear_trace.png") print "Rhat = {0}".format(pm.gelman_rubin(trace)) t1 = time.clock() print "elapsed time = {0}".format(t1 - t0) #trace if(not multicore): trace=ts[0] with model: pm.traceplot(trace,model.vars) pm.forestplot(trace) with open("simplelinearregression_model.pkl","w") as fpw: pkl.dump(model,fpw) with open("simplelinearregression_trace.pkl","w") as fpw: pkl.dump(trace,fpw) with open("simplelinearregression_model.pkl") as fp: model=pkl.load(fp) with open("simplelinearregression_trace.pkl") as fp: trace=pkl.load(fp)
def __init__(self,X_train,y_train,n_hidden,lam=1): n_train = y_train.shape[0] n_dim = X_train.shape[1] print X_train.shape with pm.Model() as rbfnn: C = pm.Normal('C',mu=0,sd=10,shape=(n_hidden)) #beta = pm.Gamma('beta',1,1) w = pm.Normal('w',mu=0,sd=10,shape=(n_hidden+1)) #component, updates = theano.scan(fn=lambda x: T.sum(C-x)**2,sequences=[X_train]) y_out=[] for x in X_train: #rbf_out = T.exp(-lam*T.sum((C-x)**2,axis=1)) #1d speed up rbf_out = T.exp(-lam*(C-x)**2) #rbf_out = theano.printing.Print(rbf_out) rbf_out_biased = \ T.concatenate([ rbf_out, T.alloc(1,1) ], 0) y_out.append(T.dot(w,rbf_out_biased)) y = pm.Normal('y',mu=y_out,sd=0.01,observed=y_train) start = pm.find_MAP(fmin=scipy.optimize.fmin_l_bfgs_b) print start step = pm.NUTS(scaling=start) trace = pm.sample(2000, step, progressbar=False) step = pm.NUTS(scaling=trace[-1]) trace = pm.sample(20000,step,start=trace[-1]) print summary(trace, vars=['C', 'w']) vars = trace.varnames for i, v in enumerate(vars): for d in trace.get_values(v, combine=False, squeeze=False): d=np.squeeze(d) with open(str(v)+".txt","w+") as thefile: for item in d: print>>thefile, item traceplot(trace) plt.show()
data2 = T131A[:, 5] cov2 = np.diag(T131A[:, 6]**2) llk2 = pm.MvNormal('llk2', mu=synthetic2(x0, y0, z0, dV), cov=cov2, observed=data2) data3 = T130A[:, 5] cov3 = np.diag(T130A[:, 6]**2) llk3 = pm.MvNormal('llk3', mu=synthetic3(x0, y0, z0, dV), cov=cov3, observed=data3) niter = 1000 start = pm.find_MAP(model=basic_model) step = pm.NUTS(scaling=start) trace = pm.sample(niter, start=start, step=step) #n_chains = 100 #n_steps = 50 #tune_interval = 10 #n_jobs = 1 #trace = smc.sample_smc( # n_steps=n_steps, # n_chains=n_chains, # tune_interval=tune_interval, # n_jobs=n_jobs, # #start=start, # progressbar=False, # stage=0,
def n_polyfit_MCMC(n, data, init_guess, n_tuning_steps=1500, n_draws=2500, n_chains=4, nosetest=False, compute_traces=False): """ Fits the data to a polynomial function of degree n using pymc3 Errors on temperature are considered in the model model: temp = C_0 + C_1 * depth + C_2 * depth ^2 + ... + C_n * depth^n — uniform priors on all parameters bounded by Antarctic ice temps Plots the traces in the MCMC (if n_chains > 2) Parameters ---------- data : pandas DataFrame data and metadata contained in pandas DataFrame Format described in tutorial notebook init_guess : dict dictionary containing initial values for each of the parameters in the model (C_0, C_1, C_2)) n_tuning_steps : int (>= 0) number of tuning steps used in MCMC (default = 1500) NOTE: Number of tuning steps must be >= 0 If < 0, n_tuning_steps will automatically be set to the default (1500) n_draws : int (> 0) number of draws used in MCMC (default = 2500) NOTE: n_draws must be >= 4 for convergence checks and > 0 in general If < 1, n_draws will automatically be set to the default (2500) n_chains : int (> 0) number of walkers used to sample posterior in MCMC (default = 5) NOTE: number of chains must be >= 2 to visualize traces and must be > 0 in general If < 1, n_chains will automatically be set to the default (4) nosetest : bool bool that specifies whether or not a test is being conducted if testing is being run, then sampling will not be performed compute_traces : bool bool that indicates wheter or not to compute the traces Returns ------- traces : pymc3 MultiTrace object, OR int (depending on compute_traces) Traces generated from MCMC sampling 0 if compute_traces == False best_fit : dict dictionary containing best-fit parameters and covariance matrix NOTE: when testing, None is returned, as no sampling/inference is performed """ # error checking for MCMC-related parameters # if parameters outside allowed values, set them to the default if n_tuning_steps < 0: print( "You have entered an invalid value for n_tuning_steps (must be >= 0). Reverting to default (1500)" ) n_tuning_steps = 1500 if n_draws < 1: print( "You have entered an invalid value for n_draws (must be >= 1). Reverting to default (2500)" ) n_draws = 2500 if n_chains < 1: print( "You have entered an invalid value for n_chains (must be >= 1). Reverting to default (4)" ) n_chains = 4 # prepare data depth = data['Depth'].values temp = data['Temperature'].values sigma_y = data['temp_errors'].values with pm.Model() as poly_model: # define priors for each parameter in the polynomial fit (e.g C_0 + C_1*x + C_2*x^2 + ...) C_0 = pm.Uniform( 'C_0', -60, -40 ) # not expected to change more than +/- 5 deg C according to base camp measurements C_n = [ pm.Uniform('C_{}'.format(i), -60 / 800**i, 10 / 800**i) for i in range(1, n + 1) ] polynomial = C_0 + np.sum([C_n[i] * depth**(i + 1) for i in range(n)]) # define likelihood sigma_T = 1. y_obs = pm.Normal("temp_pred", mu=polynomial, sd=sigma_T, observed=temp) if not nosetest: with poly_model: # unleash the inference if compute_traces == True: traces = pm.sample( init="adapt_diag", tune=n_tuning_steps, draws=n_draws, chains=n_chains) # need at least two chains to plot traces #az.plot_pair(traces, divergences=True) if n_chains >= 2: az.plot_trace(traces) else: traces = 0 best_fit, scipy_output = pm.find_MAP(start=init_guess, return_raw=True) covariance_matrix = np.flip(scipy_output.hess_inv.todense() / sigma_y[0]) best_fit['covariance matrix'] = covariance_matrix return (traces, best_fit) if not nosetest else None
def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, random_seed=-1, progressbar=True, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. In our experience, using ADVI to estimate a diagonal covariance matrix and using this as the scaling matrix produces robust results over a wide class of continuous models. Parameters ---------- init : str {'ADVI', 'ADVI_MAP', 'MAP', 'NUTS'} Initialization method to use. * ADVI : Run ADVI to estimate posterior mean and diagonal covariance matrix. * ADVI_MAP: Initialize ADVI with MAP and use MAP as starting point. * MAP : Use the MAP as starting point. * NUTS : Run NUTS and estimate posterior mean and covariance matrix. njobs : int Number of parallel jobs to start. n_init : int Number of iterations of initializer If 'ADVI', number of iterations, if 'metropolis', number of draws. model : Model (optional if in `with` context) progressbar : bool Whether or not to display a progressbar for advi sampling. **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) pm._log.info('Initializing NUTS using {}...'.format(init)) random_seed = int(np.atleast_1d(random_seed)[0]) if init is not None: init = init.lower() if init == 'advi': v_params = pm.variational.advi(n=n_init, random_seed=random_seed, progressbar=progressbar) start = pm.variational.sample_vp(v_params, njobs, progressbar=False, hide_transformed=False, random_seed=random_seed) if njobs == 1: start = start[0] cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() v_params = pm.variational.advi(n=n_init, start=start, random_seed=random_seed) cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) elif init == 'nuts': init_trace = pm.sample(step=pm.NUTS(), draws=n_init, random_seed=random_seed)[n_init // 2:] cov = np.atleast_1d(pm.trace_cov(init_trace)) start = np.random.choice(init_trace, njobs) if njobs == 1: start = start[0] else: raise NotImplementedError( 'Initializer {} is not supported.'.format(init)) step = pm.NUTS(scaling=cov, is_cov=True, **kwargs) return start, step
for train_index, test_index in kf.split(X): # prepare train/test batch pred_train, y_train, X_train = pred[train_index], y[ train_index], X[train_index] pred_test, y_test, X_test = pred[test_index], y[test_index], X[ test_index] pred_tt.set_value(pred_train) y_tt.set_value(y_train) X_tt.set_value(X_train) # fit model # with model_spec: # model_fit = pm.fit(n=100000, method=pm.ADVI()) # trace = model_fit.sample(1000) model_fit = pm.find_MAP(model=model_spec) # do prediction w_tr = model_fit["w"] # np.mean(trace["w"], axis=0) w_cv = ensemble_pred(X_test, X_train, model_est=model_fit, P=P, ls=ls) w_or = ensemble_pred(X_pred, X_train, model_est=model_fit, P=P, ls=ls) # training error
p_grid, posterior = posterior_grid_approx(ps, w, n) ax[idx].plot(p_grid, posterior, "o-", label=f"success = {w}\ntosses = {n}") ax[idx].set_xlabel("probability of water") ax[idx].set_ylabel("posterior probability") ax[idx].set_title(f"{ps} points") ax[idx].legend(loc=0) # %% data = np.repeat((0, 1), (3, 6)) # %% # %% with pm.Model() as normal_approximation: p = pm.Uniform("p", 0, 1) w = pm.Binomial("w", n=len(data), p=p, observed=data.sum()) mean_q = pm.find_MAP() std_q = ((1 / pm.find_hessian(mean_q, vars=[p]))**0.5)[0] mean_q["p"], std_q # %% w, n = 6, 9 x = np.linspace(0, 1, 100) plt.plot(x, stats.beta.pdf(x, w + 1, n - w + 1), label="True posterior") # quadratic approximation plt.plot(x, stats.norm.pdf(x, mean_q["p"], std_q), label="Quadratic approximation") plt.legend(loc=0) plt.title(f"n = {n}")
atts_star = pm3.Normal("atts_star", mu=0, tau=tau_att, shape=num_teams) defs_star = pm3.Normal("defs_star", mu=0, tau=tau_def, shape=num_teams) atts = pm3.Deterministic('atts', atts_star - tt.mean(atts_star)) defs = pm3.Deterministic('defs', defs_star - tt.mean(defs_star)) home_theta = tt.exp(intercept + home + atts[away_team] + defs[home_team]) away_theta = tt.exp(intercept + atts[away_team] + defs[home_team]) # likelihood of observed data home_points = pm3.Poisson('home_points', mu=home_theta, observed=observed_home_goals) away_points = pm3.Poisson('away_points', mu=away_theta, observed=observed_away_goals) # * We specified the model and the likelihood function # * Now we need to fit our model using the Maximum A Posteriori algorithm to decide where to start out No U Turn Sampler # In[6]: with model: start = pm3.find_MAP() step = pm3.NUTS(state=start) trace = pm3.sample(2000, step, start=start, progressbar=True) pm3.traceplot(trace) # In[ ]:
def test_run(self): with self.build_model(): start = pm.find_MAP(method="Powell") pm.sample(50, pm.Slice(), start=start)
from plot_post import plot_post # Generate the data y1 = np.array([1, 1, 1, 1, 1, 0, 0]) # 5 heads and 2 tails y2 = np.array([1, 1, 0, 0, 0, 0, 0]) # 2 heads and 5 tails with pm.Model() as model: # define the prior theta1 = pm.Beta('theta1', 3, 3) # prior theta2 = pm.Beta('theta2', 3, 3) # prior # define the likelihood y1 = pm.Bernoulli('y1', p=theta1, observed=y1) y2 = pm.Bernoulli('y2', p=theta2, observed=y2) # Generate a MCMC chain start = pm.find_MAP() # Find starting value by optimization trace = pm.sample(10000, pm.Metropolis(), progressbar=False) # Use Metropolis sampling # start = pm.find_MAP() # Find starting value by optimization # step = pm.NUTS() # Instantiate NUTS sampler # trace = pm.sample(10000, step, start=start, progressbar=False) # create an array with the posterior sample theta1_sample = trace['theta1'] theta2_sample = trace['theta2'] # Plot the trajectory of the last 500 sampled values. plt.plot(theta1_sample[:-500], theta2_sample[:-500], marker='o') plt.xlim(0, 1) plt.ylim(0, 1) plt.xlabel(r'$\theta1$')
x_dim, y_dim = image.shape pixel_values = np.concatenate(image) # grey scale between 0 and 1 N, wmat, amat = create_matrices(x_dim, y_dim) with pm.Model() as model: beta0 = pm.Normal('beta0', mu=0., tau=1e-2) tau = pm.Gamma('tau_c', alpha=1.0, beta=1.0) mu_phi = CAR2('mu_phi', w=wmat, a=amat, tau=tau, shape=N) phi = pm.Deterministic('phi', mu_phi - tt.mean(mu_phi)) # zero-center phi mu = pm.Deterministic('mu', beta0 + phi) Yi = pm.LogitNormal('Yi', mu=mu, observed=pad(pixel_values)) max_a_post = pm.find_MAP() step = pm.NUTS() trace = pm.sample(draws=N_SAMPLES, step=step, start=max_a_post, cores=2, tune=N_TUNE, chains=N_CHAINS) posterior_pred = pm.sample_posterior_predictive(trace) prefix_file_name = 'mnist_digit{}(label{})_'.format(i, label) np.save( new_name(name=prefix_file_name + 'phi_values', suffix='.npy', directory=DIRECTORY), trace.get_values('phi'))
# wide variance = less information for prior, data is stronger # alpha=100, beta=1: QUITE CLOSE to observed data! Also more stable? c = pm.Normal('c', mu=hyper_mu, sd=hyper_sd, shape=(config.K, config.K)) # mask = np.ones((config.K, config.K)) # np.fill_diagonal(mask, 0) # p = pm.Deterministic('p', config.sigmoid(a + b + offset)) # p = config.sigmoid(a + b + offset) #p = config.sigmoid(a + b + c) p = config.sigmoid(c) # Likelihood (sampling distribution) of observations pm.Binomial('L', n=N_data, p=p, observed=s_with_obs) # MAP MAP = pm.find_MAP(model=model) # Find starting point of MCMC #a_ = np.tile(np.squeeze(MAP['a']), (4, 1)).T #b_ = np.tile(np.squeeze(MAP['b']), (4, 1)) c_ = np.squeeze(MAP['c']) #intercept_ = MAP['intercept'] # y_MAP = config.sigmoid(a_ + b_ + c_ + intercept_) y_MAP = config.sigmoid(c_) #tic() # Draw posterior samples with model: # THIS TAKES A WHILE TO RUN! trace = pm.sample(1000, nuts_kwargs=dict(target_accept=.9, max_treedepth=20), chains=config.N_MCMC_CHAINS) #toc()
T0= Uniform('T0',0,24) tau= Gamma('tau',0.0001, 0.0001) mu_temp= c*T*((T-T0)*(T0<T))*np.sqrt((Tm-T)*(Tm>T)) mu= 0*(mu_temp<0) + mu_temp*(mu_temp>0) Y_obs = Normal('Y_obs',mu=mu, sd=tau, observed= Y) from pymc3 import Metropolis, sample, find_MAP from scipy import optimize with basic_model_GCR: # obtain starting values via MAP start = find_MAP(fmin=optimize.fmin_powell) # draw 5000 posterior samples trace= sample(sample_size, step= Metropolis(), start=start) #thin the samples by selecting every 5 samples thin_factor=5 #summary(trace) #traceplot(trace);
def find_MAP(self, start=None, points=1, plot=False, return_points=False, display=True, powell=True): points_list = list() if start is None: start = self.get_params_current() if type(start) is list: i = 0 for s in start: i += 1 points_list.append(('start' + str(i), self.model.logp(s), s)) else: points_list.append(('start', self.model.logp(start), start)) if self.outputs.get_value() is None: print('For find_MAP it is necessary to have observations') return start if display: print('Starting function value (-logp): ' + str(-self.model.logp(points_list[0][2]))) if plot: plt.figure(0) self.plot(params=points_list[0][2], title='start') plt.show() with self.model: i = -1 while i < points: i += 1 try: if powell: name, logp, start = points_list[i // 2] else: name, logp, start = points_list[i] if i % 2 == 0 or not powell: # if name.endswith('_bfgs'): if i > 0: points += 1 continue name += '_bfgs' if display: print('\n' + name) new = pm.find_MAP(fmin=sp.optimize.fmin_bfgs, vars=self.sampling_vars, start=start, disp=display) else: if name.endswith('_powell'): if i > 1: points += 1 continue name += '_powell' if display: print('\n' + name) new = pm.find_MAP(fmin=sp.optimize.fmin_powell, vars=self.sampling_vars, start=start, disp=display) points_list.append((name, self.model.logp(new), new)) if plot: plt.figure(i + 1) self.plot(params=new, title=name) plt.show() except: pass optimal = points_list[0] for test in points_list: if test[1] > optimal[1]: optimal = test name, logp, params = optimal if display: #print(params) pass if return_points is False: return params else: return params, points_list
def get_posterior(data, n=100, draws=2000, n_init=200000, progressbar=True, *args, **kwargs): with pm.Model() as model: # Define Priors p_err = pm.Uniform('p_err', 0, 0.1) # Upper limit due to normalization p_ent = pm.Uniform('p_ent', 0, 1 - 6 * p_err) p_a = pm.Uniform('p_a', 0, 1 - 6 * p_err - p_ent) p_e = pm.Uniform('p_e', 0, 1 - 6 * p_err - p_ent) p_o = pm.Uniform('p_o', 0, 1 - 6 * p_err - p_ent) p_i = pm.Uniform('p_i', 0, 1 - 6 * p_err - p_ent) nvc_a = pm.Deterministic('nvc_a', 1 - p_a - 6 * p_err - p_ent) nvc_i = pm.Deterministic('nvc_i', 1 - p_i - 6 * p_err - p_ent) nvc_e = pm.Deterministic('nvc_e', 1 - p_e - 6 * p_err - p_ent) nvc_o = pm.Deterministic('nvc_o', 1 - p_o - 6 * p_err - p_ent) # Model specification: define all possible moods # syll tt-syntax a i e o NVC aa = [p_a, p_ent, p_err, p_err, nvc_a] ai = [p_err, p_a, p_err, p_ent, nvc_a] ia = ai ae = [p_err, p_err, p_a, p_ent, nvc_a] ea = ae ao = [p_err, p_ent, p_err, p_a, nvc_a] oa = ao ii = [p_err, p_i, p_err, p_ent, nvc_i] ie = [p_err, p_err, p_i, p_ent, nvc_i] ei = ie io = [p_err, p_ent, p_err, p_i, nvc_i] oi = io ee = [p_err, p_err, p_e, p_ent, nvc_e] eo = [p_err, p_ent, p_err, p_e, nvc_e] oe = eo oo = [p_err, p_ent, p_err, p_o, nvc_o] # Define the relationship between moods and syllogisms moods = [ aa, ai, ae, ao, ia, ii, ie, io, ea, ei, ee, eo, oa, oi, oe, oo ] syllogs = [] for m in moods: # Figure 1 line = m[0:4] + [p_err] * 4 + [m[-1]] syllogs += [line] # Figure 2 line = [p_err] * 4 + m[0:4] + [m[-1]] syllogs += [line] line = [] for para in m[0:4]: if para == p_err: line += [p_err] else: line += [para / 2] # Paste this two times line *= 2 # Add NVC line += [m[-1]] syllogs += [line] * 2 model_matrix = tt.stack(syllogs) # Define likelihood pm.Multinomial(name='rates', n=n, p=model_matrix, observed=data) map_estimate = pm.find_MAP(model=model) trace = pm.sample(draws=draws, njobs=1, start=map_estimate, n_init=n_init, progressbar=progressbar) print('Model logp = ', model.logp(map_estimate)) return model, trace
import pymc3 as pm basic_model = pm.Model() with basic_model: # Priors for unknown model parameters alpha = pm.Normal('alpha', mu=0, sd=10) beta = pm.Normal('beta', mu=0, sd=10, shape=2) sigma = pm.HalfNormal('sigma', sd=1) # Expected value of outcome mu = alpha + beta[0] * X1 + beta[1] * X2 # Likelihood (sampling distribution) of observations Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y) map_estimate = pm.find_MAP(model=basic_model) # map_estimate = pm.find_MAP(model=basic_model, fmin=optimize.fmin_powell) print(map_estimate) alpha1 = map_estimate['alpha'] beta1 = map_estimate['beta'] sigma1 = map_estimate['sigma'] yp = (alpha1 + beta1[0] * TX1 + beta1[1] * TX2) # print(TY) # print(yp) plt.plot(FY, color="green") plt.plot(yp, color="pink") plt.show()
def calc_MAP(mcmc: MCMCSpec, model): with model: return pm.find_MAP()
eta=2, sd_dist=sd_dist) chol = pm.expand_packed_triangular(numberOfFeatures, chol_packed) cov_mx = pm.Deterministic('estimated_cov', chol.dot(chol.T)) # observations x1, x0 are supposed to be P(x|y=class1)=N(mu1,cov_both), P(x|y=class0)=N(mu0,cov_both) # here is where the Dataset (x1,x0) comes to influence the choice of paramters (mu1,mu0, cov_both) # this is done through the "observed = ..." argument; note that above we didn't have that x1_obs = pm.MvNormal('x1', mu=mu1, chol=chol, observed=x1) x0_obs = pm.MvNormal('x0', mu=mu0, chol=chol, observed=x0) # done with setting up the model # now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation # map_estimate1 is a dictionary: "parameter name" -> "it's estimated value" map_estimate1 = pm.find_MAP(model=basic_model) #compare map_estimate1['estimated_mu1'] with true_mu1 #same for mu_2, cov # we can also do MCMC sampling from the distribution over the parameters # and e.g. get confidence intervals # with basic_model: # obtain starting values via MAP start = pm.find_MAP() # instantiate sampler step = pm.Slice()
) #account for removal of burn in at the beginning of each chain with pm.Model() as model: # define priors # Based on P. Barbera's work we know ideology scores tend to be normally distributed around 0 # For standard deviation, typically an exponential distribution is used mu = pm.Normal('mu', mu=0, sigma=2, shape=sample_mu.shape) sigma = pm.Exponential('sigma', lam=2, shape=sample_sigma.shape) # define likelihood observed_data = pm.Normal('observed_data', mu=mu, sigma=sigma, observed=samples) # inference map_estimate = pm.find_MAP() step = pm.NUTS(target_accept=0.90) trace = pm.sample(draws=niter, start=None, init='advi_map', step=step, random_seed=323, cores=n_cores, chains=n_cores) print("Done with inference!") # Get samples of population-level posterior for use as prior in individual-level inference later del samples posterior_mu = trace.get_values('mu', burn=burn_in, combine=True) posterior_sigma = trace.get_values('sigma', burn=burn_in, combine=True)
plt.scatter(x, y, marker='+', c='r') plt.plot(x, true_y, 'b') plt.show() # pymc modeling with pm.Model() as model: amp = pm.HalfCauchy("amp", 1) ls = pm.HalfCauchy("ls", 1) cov_func = amp**2 * pm.gp.cov.ExpQuad(1, ls) # input_dim=1,ls=ls M = pm.gp.mean.Linear(coeffs=(y / x).mean()) gp = pm.gp.Marginal(M, cov_func) noise = pm.HalfCauchy("noise", 2) gp.marginal_likelihood("f", X=x.reshape(-1, 1), y=y, noise=noise) trace = pm.sample(1000, chains=1) map_ = pm.find_MAP(model=model) X_new = np.linspace(0, np.pi * 2, 150).reshape(-1, 1) # .predict method: return the mean and variance given a particular point mu, var = gp.predict(X_new, point=map_, diag=True, pred_noise=True) sd = np.sqrt(var) # plot # draw plot plt.figure(figsize=(4, 3)) # plot mean and 2σ intervals plt.ylim(-2, 2) plt.xlim(0, np.pi * 2) plt.plot(X_new, mu, lw=2, c='r', label="mean and 2σ region") plt.plot(X_new, mu - 2 * sd, lw=1, c='r') plt.plot(X_new, mu + 2 * sd, lw=1, c='r')
dislike = pm.Poisson('dislike', mu=lambda_minus, observed=df_videos['低評価数']) trace = pm.sample(1500, tune=3000, chains=5, random_seed=57) # - pm.traceplot(trace) # + df_trace = pm.summary(trace) df_trace # - model_map = pm.find_MAP(model=model) model_map df_trace.loc['fun[0]':'beta_plus', ['mean']].sort_values('mean', ascending=False) # + df_videos['fun'] = model_map['fun'] df_videos = df_videos.sort_values(by='fun', ascending=False) print('top 5 fun videos!') display(df_videos.head(5)) print('worst 5 fun videos...') display(df_videos.tail(5))
observedRenewed = data[0, :] observedReleased = data[1, :] # Released entries every year released = tt.mul(p[1:].log(), observedReleased[1:]) # Renewed entries every year renewed = s[-1].log() * observedRenewed[-1] return released.sum() + renewed retention = pm.DensityDist('retention', logp, observed=data) step = pm.DEMetropolis() trace = pm.sample(10000, step=step, tune=2000) # Maximum a posteriori estimators for the model mapValues = pm.find_MAP(model=BdWwithcfromNorm) # Extract alpha and beta MAP-estimators betaParams = mapValues.get('alpha').item(), mapValues.get('beta').item() theta = stats.beta.mean(betaParams[0], betaParams[1]) cHat = mapValues.get('c').item() rvar = stats.beta.var(betaParams[0], betaParams[1]) # Define a Discrete Weibull distribution def DiscreteWeibull(q, b, x): return (1 - q)**(x**b) - (1 - q)**((x + 1)**b) # Plot stuff
def init_nuts(init='advi', n_init=500000, model=None, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. In our experience, using ADVI to estimate a diagonal covariance matrix and using this as the scaling matrix produces robust results over a wide class of continuous models. Parameters ---------- init : str {'advi', 'advi_map', 'map', 'nuts'} Initialization method to use. * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. * nuts : Run NUTS and estimate posterior mean and covariance matrix. n_init : int Number of iterations of initializer If 'advi', number of iterations, if 'metropolis', number of draws. model : Model (optional if in `with` context) **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start, nuts_sampler start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) pm._log.info('Initializing NUTS using {}...'.format(init)) if init == 'advi': v_params = pm.variational.advi(n=n_init) start = pm.variational.sample_vp(v_params, 1, progressbar=False, hide_transformed=False)[0] cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() v_params = pm.variational.advi(n=n_init, start=start) cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) elif init == 'nuts': init_trace = pm.sample(step=pm.NUTS(), draws=n_init) cov = pm.trace_cov(init_trace[n_init // 2:]) start = { varname: np.mean(init_trace[varname]) for varname in init_trace.varnames } else: raise NotImplemented('Initializer {} is not supported.'.format(init)) step = pm.NUTS(scaling=cov, is_cov=True, **kwargs) return start, step
def init_nuts(init='auto', chains=1, n_init=500000, model=None, random_seed=None, progressbar=True, **kwargs): """Set up the mass matrix initialization for NUTS. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. This function implements different methods for choosing or adapting the mass matrix. Parameters ---------- init : str Initialization method to use. * auto : Choose a default initialization method automatically. Currently, this is `'jitter+adapt_diag'`, but this can change in the future. If you depend on the exact behaviour, choose an initialization method explicitly. * adapt_diag : Start with a identity mass matrix and then adapt a diagonal based on the variance of the tuning samples. All chains use the test value (usually the prior mean) as starting point. * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter in [-1, 1] to the starting point in each chain. * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal mass matrix based on the sample variance of the tuning samples. * advi+adapt_diag_grad : Run ADVI and then adapt the resulting diagonal mass matrix based on the variance of the gradients during tuning. This is **experimental** and might be removed in a future release. * advi : Run ADVI to estimate posterior mean and diagonal mass matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. This is discouraged. * nuts : Run NUTS and estimate posterior mean and mass matrix from the trace. chains : int Number of jobs to start. n_init : int Number of iterations of initializer If 'ADVI', number of iterations, if 'nuts', number of draws. model : Model (optional if in `with` context) progressbar : bool Whether or not to display a progressbar for advi sampling. **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) vars = kwargs.get('vars', model.vars) if set(vars) != set(model.vars): raise ValueError('Must use init_nuts on all variables of a model.') if not pm.model.all_continuous(vars): raise ValueError('init_nuts can only be used for models with only ' 'continuous variables.') if not isinstance(init, str): raise TypeError('init must be a string.') if init is not None: init = init.lower() if init == 'auto': init = 'jitter+adapt_diag' pm._log.info('Initializing NUTS using {}...'.format(init)) if random_seed is not None: random_seed = int(np.atleast_1d(random_seed)[0]) np.random.seed(random_seed) cb = [ pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='absolute'), pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='relative'), ] if init == 'adapt_diag': start = [model.test_point] * chains mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) elif init == 'jitter+adapt_diag': start = [] for _ in range(chains): mean = {var: val.copy() for var, val in model.test_point.items()} for val in mean.values(): val[...] += 2 * np.random.rand(*val.shape) - 1 start.append(mean) mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) elif init == 'advi+adapt_diag_grad': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdaptGrad( model.ndim, mean, cov, weight) elif init == 'advi+adapt_diag': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, cov, weight) elif init == 'advi': approx = pm.fit(random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 potential = quadpotential.QuadPotentialDiag(cov) elif init == 'advi_map': start = pm.find_MAP(include_transformed=True) approx = pm.MeanField(model=model, start=start) pm.fit(random_seed=random_seed, n=n_init, method=pm.KLqp(approx), callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window) start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 potential = quadpotential.QuadPotentialDiag(cov) elif init == 'map': start = pm.find_MAP(include_transformed=True) cov = pm.find_hessian(point=start) start = [start] * chains potential = quadpotential.QuadPotentialFull(cov) elif init == 'nuts': init_trace = pm.sample(draws=n_init, step=pm.NUTS(), tune=n_init // 2, random_seed=random_seed) cov = np.atleast_1d(pm.trace_cov(init_trace)) start = list(np.random.choice(init_trace, chains)) potential = quadpotential.QuadPotentialFull(cov) else: raise NotImplementedError( 'Initializer {} is not supported.'.format(init)) step = pm.NUTS(potential=potential, model=model, **kwargs) return start, step
y = np.repeat([1, 0], [z, N - z]) # THE MODEL. with pm.Model() as model: # Hyperprior on model index: model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) # Prior nu = pm.Normal('nu', mu=0, tau=0.1) # it is posible to use tau or sd eta = pm.Gamma('eta', .1, .1) theta0 = 1 / (1 + pm.exp(-nu)) # theta from model index 0 theta1 = pm.exp(-eta) # theta from model index 1 theta = pm.switch(pm.eq(model_index, 0), theta0, theta1) # Likelihood y = pm.Bernoulli('y', p=theta, observed=y) # Sampling start = pm.find_MAP() step1 = pm.Metropolis(model.vars[1:]) step2 = pm.ElemwiseCategoricalStep(var=model_index, values=[0, 1]) trace = pm.sample(10000, [step1, step2], start=start, progressbar=False) # EXAMINE THE RESULTS. burnin = 1000 thin = 5 ## Print summary for each trace #pm.summary(trace[burnin::thin]) #pm.summary(trace) ## Check for mixing and autocorrelation #pm.autocorrplot(trace[burnin::thin], vars =[nu, eta]) #pm.autocorrplot(trace, vars =[nu, eta])
vert_des2 = strSource2 * coeff_up_des2 east_des = east_des1 + east_des2 north_des = north_des1 + north_des2 vert_des = vert_des1 + vert_des2 Ulos_des = east_des * UEast_des + north_des * UNorth_des + vert_des * UVert_des UObs_asc = mv.MvNormal('Uobs_asc', mu=Ulos_asc, cov=covariance_asc, observed=U_asc) UObs_des = mv.MvNormal('Uobs_des', mu=Ulos_des, cov=covariance_des, observed=U_des) step = pm.Metropolis() trace = pm.sample(Niter, step) trace = trace[Nburn:] #Discard first 1000 samples of each chain print(pm.summary(trace)) map_estimate = pm.find_MAP(model=model) print(map_estimate) results = {} results['MAP'] = map_estimate results['trace'] = trace results['ref_coord'] = data['ref_coord'] results['iterations'] = Niter pickle.dump( results, open(pathgg_results + 'Mogi_Metropolis_' + str(Niter) + '_2mogi.pickle', 'wb'))
a = pm.Normal( "a", 0.0, 10.0, transform=pm.distributions.transforms.ordered, shape=6, testval=np.arange(6) - 2.5, ) resp_obs = pm.OrderedLogistic( "resp_obs", 0.0, a, observed=trolley_df.response.values - 1 ) # %% with m11_1: map_11_1 = pm.find_MAP() # %% map_11_1["a"] # %% sp.special.expit(map_11_1["a"]) # %% with m11_1: trace_11_1 = pm.sample(1000, tune=1000) # %% def ordered_logistic_proba(a):
def model(sim_data, prior_data, keys=[], out_fold='test'): #x = tt.as_tensor(np.ones(4)) #y = tt.as_tensor(np.ones(3)) #z = tt.concatenate([x,y],axis=0) #print z.eval() for file in os.listdir(out_fold): name = "%s/%s" % (out_fold, file) os.remove(name) #print file mod = pm.Model() with mod: #probailities of each primitive ps = [] #weights = pm.Dirichlet("weights", np.ones(len(prim_type))) for i in xrange(len(prim_type)): #weight = weights[i] #weight = 1. #prob = np.ones(prim_type[i])/float(prim_type[i]) name = "p_%s" % i name_w = name + "_w" weight = pm.Exponential(name_w, 1.0) * np.ones(prim_type[i]) prob = pm.Dirichlet(name, np.ones(prim_type[i])) ps.append(weight * prob) probs = tt.concatenate(ps, axis=0) #probs = np.ones(N_PRIM)/float(N_PRIM) #copy the probability vector a number of times #so that it becomes a tensor #ith that the probabilities of each primtive #for each hypothesis for each sequence pair probs = tt.tile(probs, (N_TOP, 1)) probs = tt.tile(probs, (N_PAIRS, 1, 1)) #and now convert the probabilities assigned #to each hypothesis for a given sequence pair #into entropy ents = tt.pow(probs, prior_data) ents = tt.log(ents) """ x1 = tt.sum(ents,axis=2) x2 = tt.exp(x1) norms = tt.sum(x2) x2 = x2/norms ents = -1.0 * x1 * x2 ents = tt.sum(ents, axis=1) """ ents = tt.sum(ents, axis=2) ents = tt.max(ents, axis=1) #ents = tt.exp(ents) ents = pm.Deterministic('ents', ents) mean_pr = tt.mean(ents) #ents = ents - mean_pr std_pr = tt.std(ents) ents = (ents - mean_pr) / std_pr ents = ents[assigns] #intercept #alpha = pm.Uniform('alpha', 0,1) * 5. alpha = pm.Normal('alpha', mu=3, sd=1) #slope beta = pm.Normal('beta', mu=0.0, sd=10) #standard deviation sigma = pm.HalfNormal('sigma', sd=1) #expected value of similarity mu = alpha + beta * ents #compare fit to observed similarity data Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=sim_data_lst) db = pm.backends.Text(out_fold) trace = pm.sample(MCMC_STEPS, tune=BURNIN, thin=MCMC_THIN, trace=db) map_estimate = pm.find_MAP(model=mod) print map_estimate c = 0 if len(keys) > 0: for z in map_estimate['ents']: print keys[c], z c += 1 return trace
ydata = theta_true[0] + theta_true[1] * xdata # add scatter to points xdata = np.random.normal(xdata, 10) ydata = np.random.normal(ydata, 10) data = {'x': xdata, 'y': ydata} with pymc3.Model() as model: alpha = pymc3.Uniform('intercept', -100, 100) # Create custom densities beta = pymc3.DensityDist('slope', lambda value: -1.5 * T.log(1 + value**2), testval=0) sigma = pymc3.DensityDist('sigma', lambda value: -T.log(T.abs_(value)), testval=1) # Create likelihood like = pymc3.Normal('y_est', mu=alpha + beta * xdata, sd=sigma, observed=ydata) start = pymc3.find_MAP() step = pymc3.NUTS(scaling=start) # Instantiate sampler trace = pymc3.sample(10000, step, start=start) ################################################# # Create some convenience routines for plotting # All functions below written by Jake Vanderplas def compute_sigma_level(trace1, trace2, nbins=20): """From a set of traces, bin by number of standard deviations""" L, xbins, ybins = np.histogram2d(trace1, trace2, nbins) L[L == 0] = 1E-16 logL = np.log(L) shape = L.shape
### END OF NEW FOR COUPLED CALIBRATION WITH DLM-GASP # Likelihood (sampling distribution) of observations # #y_obs = pm.Normal('y_obs', mu=muGP[0], sd=sigma, observed=0) y_obs = pm.Normal('y_obs', mu=mu, sd=sigma, observed=0) #y_obs = pm.Normal('y_obs', mu=mu,sd=sigma, observed=90) #y = pm.DensityDist('y', logp(var1,var2,var3)) #basic_model.logp({'y': 0.}) with basic_model: print "Starting ...." start = pm.find_MAP( fmin=optimize.fmin_powell) #=optimize.fmin_powell) #start = {'var1': 0.5, 'var2': 0.5, 'var3': 0.5, 'var4': 0.5, 'var5': 0.5, 'var6': 0.5, 'var7': 0.5, 'var8': 0.5, 'var9': 0.5, 'var10': 0.5, 'var11': 0.5, 'var12': 0.5, 'var13': 0.5, 'var14': 0.5, 'var15': 0.5, 'var16': 0.5, 'var17': 0.5, 'var18': 0.5, 'var19': 0.5, 'var20': 0.5, 'var21': 0.5, 'sigma': 4000} #C = approx_hessian(model.test_point) #step = pm.HamiltonianMC([var1,var2,var3,var4,var5,var6,var7,var8,var9,var10,var11,var12,var13,var14,var15,var16,var17,var18,var19,var20,var21,sigma,err]) print "Assigning step method...." #step1 = pm.Metropolis(vars=[var1,var2,var3,var4,var5,var6,var7,var8,var9,var10,var11,var12,var13,var14,var15,var16,var17,var18,var19,var20,var21]) #step2 = pm.Metropolis(vars=[muGP,sigma]) step = pm.Metropolis() #step = pm.NUTS() print "Running sample algorightm...." trace = pm.sample(7500, tune=500, step=step, njobs=1) #tune=100 pm.traceplot(trace) pm.backends.text.dump(os.getcwd(), trace) for csvfile in glob.glob(os.path.join('.', 'chain-0.csv')): worksheet = workbook.add_worksheet('Case ' + str(case))
def fit( self, draws: int = 500, chains: int = 4, trace_size: int = 500, method: Sampler = Sampler.NUTS, map_initialization: bool = False, finalize: bool = True, step_kwargs: Dict = None, sample_kwargs: Dict = None, ): """Fit the PMProphet model. Parameters ---------- draws : int, > 0 The number of MCMC samples. chains: int, =4 The number of MCMC draws. trace_size: int, =1000 The last N number of samples to keep in the trace method : Sampler The sampler of your choice map_initialization : bool Initialize the model with maximum a posteriori estimates. finalize : bool Finalize the model. step_kwargs : dict Additional arguments for the sampling algorithms (`NUTS` or `Metropolis`). sample_kwargs : dict Additional arguments for the PyMC3 `sample` function. """ if sample_kwargs is None: sample_kwargs = {} if step_kwargs is None: step_kwargs = {} if chains * draws < trace_size and method != Sampler.ADVI: raise Exception( "Desired trace size should be smaller than the sampled data points" ) self.skip_first = (chains * draws) - trace_size if method != Sampler.ADVI else 0 self.chains = chains if finalize: self.finalize_model() with self.model: if map_initialization: self.start = pm.find_MAP(maxeval=10000) if draws == 0: self.trace = { k: np.array([v]) for k, v in self.start.items() } if draws: if method != Sampler.ADVI: step_method = method.value(**step_kwargs) self.trace = pm.sample( draws, chains=chains, step=step_method, start=self.start if map_initialization else None, **sample_kwargs) else: res = pm.fit( draws, start=self.start if map_initialization else None) self.trace = res.sample(trace_size)
alpha_A = 400.0 / 16.0 beta_A = 1.0 / 16.0 alpha_N = 400.0 / 16.0 beta_N = 1.0 / 16.0 alpha_D = 2.0 + 1.0 / 1.6 beta_D = 100 * (alpha_D - 1) delta_t = 0.802 with pm.Model() as model: D = pm.InverseGamma('D', alpha=alpha_D, beta=beta_D) A = pm.Gamma('A', alpha=alpha_A, beta=beta_A) B = pm.Deterministic('B', pm.exp(-delta_t * D / A)) path = lcm.Ornstein_Uhlenbeck('path', D=D, A=A, B=B, observed=time_series) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) trace = pm.sample(100000, start=start) pm.summary(trace) data_dict = { 'D': trace['D'], 'A': trace['A'], 'B': trace['B'], } df = pd.DataFrame(data_dict) df.to_csv(datadir + 'LIG' + region + str(voxel) + '.csv', index=False) pm.traceplot(trace)