def test_value_n_eff_rhat(self): mu = -2.1 tau = 1.3 with Model(): Normal('x0', mu, tau, testval=floatX_array(.1)) # 0d Normal('x1', mu, tau, shape=2, testval=floatX_array([.1, .1]))# 1d Normal('x2', mu, tau, shape=(2, 2), testval=floatX_array(np.tile(.1, (2, 2))))# 2d Normal('x3', mu, tau, shape=(2, 2, 3), testval=floatX_array(np.tile(.1, (2, 2, 3))))# 3d trace = pm.sample(100, step=pm.Metropolis()) for varname in trace.varnames: # test effective_n value n_eff = pm.effective_n(trace, varnames=[varname])[varname] n_eff_df = np.asarray( pm.summary(trace, varnames=[varname])['n_eff'] ).reshape(n_eff.shape) npt.assert_equal(n_eff, n_eff_df) # test Rhat value rhat = pm.gelman_rubin(trace, varnames=[varname])[varname] rhat_df = np.asarray( pm.summary(trace, varnames=[varname])['Rhat'] ).reshape(rhat.shape) npt.assert_equal(rhat, rhat_df)
def run(n=5000): with model_1: xstart = pm.find_MAP() xstep = pm.Slice() trace = pm.sample(5000, xstep, xstart, random_seed=123, progressbar=True) pm.summary(trace)
def test_summary_1d_variable_model(): mu = -2.1 tau = 1.3 with Model() as model: x = Normal('x', mu, tau, shape=2, testval=[.1, .1]) step = Metropolis(model.vars, np.diag([1.]), blocked=True) trace = pm.sample(100, step=step) pm.summary(trace)
def test_summary_0d_variable_model(self): mu = -2.1 tau = 1.3 with Model() as model: Normal('x', mu, tau, testval=.1) step = Metropolis(model.vars, np.diag([1.]), blocked=True) trace = pm.sample(100, step=step) pm.summary(trace)
def test_disaster_model_missing(self): model = build_disaster_model(masked=True) with model: # Initial values for stochastic nodes start = {'early_mean': 2., 'late_mean': 3.} # Use slice sampler for means (other varibles auto-selected) step = pm.Slice([model.early_mean_log_, model.late_mean_log_]) tr = pm.sample(500, tune=50, start=start, step=step) pm.summary(tr)
def test_summary_2d_variable_model(self): mu = -2.1 tau = 1.3 with Model() as model: Normal('x', mu, tau, shape=(2, 2), testval=floatX_array(np.tile(.1, (2, 2)))) step = Metropolis(model.vars, np.diag([1.]), blocked=True) trace = pm.sample(100, step=step) pm.summary(trace)
def still_broken(self): ATMIP_test = self.build_model() with ATMIP_test: step = pm.ATMCMC(n_chains=500, tune_interval=25, likelihood_name=ATMIP_test.deterministics[0].name) trace = pm.ATMIP_sample( n_steps=50, step=step, njobs=1, progressbar=True, trace=self.trace_dir, ) pm.summary(trace)
def test_save_and_load_work_correctly(self): print("") self.test_HLR.fit(self.X_train, self.Y_train, self.cat_train) probs1 = self.test_HLR.predict_proba(self.X_test, self.cat_test) self.test_HLR.save(self.test_dir) HLR2 = HLR() HLR2.load(self.test_dir) self.assertEqual(self.test_HLR.num_cats, HLR2.num_cats) self.assertEqual(self.test_HLR.num_pred, HLR2.num_pred) self.assertEqual(summary(self.test_HLR.advi_trace), summary(HLR2.advi_trace)) probs2 = HLR2.predict_proba(self.X_test, self.cat_test) np.testing.assert_almost_equal(probs2, probs1, decimal=1)
def acfplot_withsummary(axs, lc, trace, summary_kwargs={}, acf_kwargs={}): summary = pm.summary(trace) summary['mode'] = list(utils.modes(trace).values()) plotacf(axs[0], lc, **acf_kwargs) axs[1].xaxis.set_visible(False) axs[1].yaxis.set_visible(False) table(axs[1], summary.round(3)) pl.tight_layout() return axs
def _predict_scores_fixed(self, X, **kwargs): d = dict(pm.summary(self.trace_)["mean"]) intercept = 0.0 weights = np.array( [d["weights[{}]".format(i)] for i in range(self.n_object_features_fit_)] ) if "intercept" in d: intercept = intercept + d["intercept"] return np.dot(X, weights) + intercept
def _predict_scores_fixed(self, X, **kwargs): d = dict(pm.summary(self.trace)['mean']) intercept = 0.0 weights = np.array([ d['weights__{}'.format(i)] for i in range(self.n_object_features) ]) if 'intercept' in d: intercept = intercept + d['intercept'] return np.dot(X, weights) + intercept
def test_save_and_load_work_correctly(self): print('') self.test_SGPR.fit(self.X_train, self.y_train) score1 = self.test_SGPR.score(self.X_test, self.y_test) self.test_SGPR.save(self.test_dir) SGPR2 = SparseGaussianProcessRegressor() SGPR2.load(self.test_dir) self.assertEqual(self.test_SGPR.inference_type, SGPR2.inference_type) self.assertEqual(self.test_SGPR.num_pred, SGPR2.num_pred) self.assertEqual(self.test_SGPR.num_training_samples, SGPR2.num_training_samples) pd.testing.assert_frame_equal(summary(self.test_SGPR.trace), summary(SGPR2.trace)) score2 = SGPR2.score(self.X_test, self.y_test) self.assertAlmostEqual(score1, score2, 1)
def _predict_scores_fixed(self, X, **kwargs): summary = dict(pm.summary(self.trace)['mean']) weights = np.zeros((self.n_object_features, self.n_mixtures)) for i, k in product(range(self.n_object_features), range(self.n_mixtures)): weights[i][k] = summary['weights__{}_{}'.format(i, k)] utility = np.dot(X, weights) p = np.mean(npu.softmax(utility, axis=1), axis=2) return p
def create_smry(trc, labels, vname=['w']): ''' Conv fn: create trace summary for sorted forestplot ''' dfsm = pm.summary(trc, varnames=vname) dfsm.rename(index={wi: lbl for wi, lbl in zip(dfsm.index, feature_labels)}, inplace=True) #dfsm.sort_values('mean', ascending=True, inplace=True) dfsm['ypos'] = np.linspace(1, 0, len(dfsm)) return dfsm
def test_save_and_load_work_correctly(self): probs1 = self.test_HLR.predict_proba(self.X_test, self.cat_test) self.test_HLR.save(self.test_dir) HLR2 = HierarchicalLogisticRegression() HLR2.load(self.test_dir) self.assertEqual(self.test_HLR.num_cats, HLR2.num_cats) self.assertEqual(self.test_HLR.num_pred, HLR2.num_pred) self.assertEqual(self.test_HLR.num_training_samples, HLR2.num_training_samples) pd.testing.assert_frame_equal(summary(self.test_HLR.trace), summary(HLR2.trace)) probs2 = HLR2.predict_proba(self.X_test, self.cat_test) np.testing.assert_almost_equal(probs2, probs1, decimal=1)
def cornerplot(lc, trace, catalog, **kwargs): truths = pm.summary(trace)['mean'] samples = pm.trace_to_dataframe(trace) cornerplot = corner.corner(samples, truths=truths, **kwargs) pl.annotate("{0} {1}".format(catalog, lc.id), xy=(0.4, 0.95), xycoords="figure fraction", fontsize=30) return cornerplot
def test_save_and_load_work_correctly(self): print('') self.test_LR.fit(self.X_train, self.Y_train) score1 = self.test_LR.score(self.X_test, self.Y_test) self.test_LR.save(self.test_dir) LR2 = LinearRegression() LR2.load(self.test_dir) self.assertEqual(self.test_LR.inference_type, LR2.inference_type) self.assertEqual(self.test_LR.num_pred, LR2.num_pred) self.assertEqual(self.test_LR.num_training_samples, LR2.num_training_samples) self.assertEqual(summary(self.test_LR.trace), summary(LR2.trace)) score2 = LR2.score(self.X_test, self.Y_test) np.testing.assert_almost_equal(score1, score2, decimal=1)
def solve_vi(X, Y, initial=None, batch_size=100): X_t = th.shared(X) #pm.Minibatch(X,batch_size=batch_size,) Y_t = th.shared(Y) #pm.Minibatch(Y,batch_size=batch_size) # sigma_Y_t = th.shared(sigma_Y)#pm.Minibatch(sigma_Y,batch_size=batch_size) #initial=(0.3,0.5,2.) dx = np.max(X) - np.min(X) dy = np.max(Y) - np.min(Y) with pm.Model() as model: sigma_K = pm.HalfNormal('sigma_K', sd=dy / 3.) l_space = pm.HalfNormal('l_space', sd=dx / 3., testval=1.) cov_func = sigma_K**2 * pm.gp.cov.ExpQuad( 2, active_dims=[0, 1], ls=l_space) gp = pm.gp.Marginal(cov_func=cov_func) eps = pm.Uniform('eps', 0.0, np.std(Y)) y1 = gp.marginal_likelihood('y1', X_t, Y_t, eps) #y2 = gp.marginal_likelihood('y2',X[:100,:],Y[:100],eps*sigma_Y[:100]) initial = initial or pm.find_MAP() approx = pm.fit( 1000, start=initial, method='advi', callbacks=[ pm.callbacks.CheckParametersConvergence(tolerance=1e-4) ]) # plt.plot(approx.hist) # plt.show() means = approx.bij.rmap(approx.mean.eval()) # print(means) # sds = approx.bij.rmap(approx.std.eval()) # print(sds) df = approx.sample(10000) p = { k: pm.summary(df)['mean'][k] for k in pm.summary(df)['mean'].keys() } # pm.traceplot(df,lines=p) # plt.show() return p
def print_summary(self, save_file = None): trace_summary = pm.summary(self.trace) print(trace_summary) if save_file is not None: ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, trace_summary, loc='upper right') # where df is your data frame plt.savefig(save_file)
def plot_traces(traces, retain=1000): ax = pm.traceplot(traces[-retain:], figsize=(12, len(traces.varnames) * 1.5), lines={ k: v['mean'] for k, v in pm.summary(traces[-retain:]).iterrows() }) for i, mn in enumerate(pm.summary(traces[-retain:])['mean']): ax[i, 0].annotate('{:.2f}'.format(mn), xy=(mn, 0), xycoords='data', xytext=(5, 10), textcoords='offset points', rotation=90, va='bottom', fontsize='large', color='#AA0022')
def model_ggl(locations, samples, centers, cc): basic_model = pm.Model() with basic_model: # Priors for unknown model parameters s1 = pm.HalfNormal('s1', sd=20) m1 = centers[0] s2 = pm.Normal('s2', sd=20) m2 = centers[1] m3 = centers[2] s3 = pm.HalfNormal('s3', sd=20) p_x = gpdf(locations[0], m1, s1) p_y = gpdf(locations[1], m2, s2) p_theta = lpdf(locations[2], m3, s3) sigma = pm.HalfNormal('sigma', sd=1) # Expected value of outcome mu = cc * p_x * p_y * p_theta # Likelihood (sampling distribution) of observations Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=samples) trace = pm.sample(5000, njobs=4) pm.summary(trace) # values S1 = np.mean(trace['s1']) M1 = centers[0] S2 = np.mean(trace['s2']) M2 = centers[1] M3 = centers[2] S3 = np.mean(trace['s3']) p_x = gpdf(locations[0], M1, S1).eval() p_y = gpdf(locations[1], M2, S2).eval() p_theta = lpdf(locations[2], M3, S3).eval() mu = cc * p_x * p_y * p_theta Err = np.sum((samples - mu)**2) print(Err)
def main(): X, y = get_data() times, traces = profiler(X, y, max_iters=10) traces_summary = pm.summary(traces) traces_summary.to_csv('results//tables//pymc3_traces_summary.csv') pd.DataFrame(times, columns='timing').to_csv('results//tables//pymc3_results.csv', index=False) return None
def test_save_and_load_work_correctly(self): print('') self.advi_stpr.fit(self.X_train, self.y_train, inference_args={"n": 25000}) score1 = self.advi_stpr.score(self.X_test, self.y_test) self.advi_stpr.save(self.test_dir) stpr2 = StudentsTProcessRegressor() stpr2.load(self.test_dir) npt.assert_equal(self.advi_stpr.inference_type, stpr2.inference_type) npt.assert_equal(self.advi_stpr.num_pred, stpr2.num_pred) npt.assert_equal(self.advi_stpr.num_training_samples, stpr2.num_training_samples) pdt.assert_frame_equal(summary(self.advi_stpr.trace), summary(stpr2.trace)) score2 = stpr2.score(self.X_test, self.y_test) npt.assert_almost_equal(score1, score2, 0)
def apply(input): client = Algorithmia.client() df = parse_dataframe(input, client) trace = run_simulation(df) # For now, save trace to algorithmia data file, and return results of summary output_file_uri = "s3+fantasygm://fantasygm-trace-out/v1/" + input[ "target_output"] # TODO: need a list of varnames for converting the multitrace to dataframe write_output(trace, output_file_uri, input["target_output"], client) return pm.summary(trace).to_json()
def bms(L, **sample_kwargs): """This function computes the exceedance probabilities (xp) and expected relative frequencies (r) from an array of log-evidences. Args: L (numpy.ndarray): Array of model log-evidences (higher is better fit). Array shape should be (K models; N subjects) **sample_kwargs: Additional arguments to the pymc.sample function. Currently `cores=1` seems to be necessary. Returns: dict: Dictionary with values xp and r. Reference: Stephan, K. E., Penny, W. D., Daunizeau, J., Moran, R. J., & Friston, K. J. (2009). Bayesian model selection for group studies. Neuroimage, 46(4), 1004-1017. """ K, N = L.shape with pm.Model() as bms: def lookup_L(L, N): """This function looks up the log-evidences for all N subjects, given the current model labels m. """ return L[tt.cast(m, dtype="int32"), tt.cast(tt.arange(N), dtype="int32")] # Priors alpha = pm.Uniform("alpha", 0, N, shape=K, testval=np.ones(K)) # Model r = pm.Dirichlet("r", a=alpha, testval=np.ones(K) / K) m = pm.Categorical("m", p=r, shape=N, testval=0) # Look up log evidence ll = pm.DensityDist("ll", logp=lookup_L, observed=dict(L=L, N=N)) # Sample trace = pm.sample(**sample_kwargs) # Build results result = {} result["summary"] = pm.summary(trace, var_names=["alpha", "r"]) result["xp"] = np.array([ np.mean( trace.get_values("r")[:, k] == trace.get_values("r").max(axis=1)) for k in range(K) ]) r_unscaled = np.array( [np.mean(trace.get_values("r")[:, k]) for k in range(K)]) result["r"] = r_unscaled / r_unscaled.sum() return result
def __init__(self,X_train,y_train,n_hidden,lam=1): n_train = y_train.shape[0] n_dim = X_train.shape[1] print X_train.shape with pm.Model() as rbfnn: C = pm.Normal('C',mu=0,sd=10,shape=(n_hidden)) #beta = pm.Gamma('beta',1,1) w = pm.Normal('w',mu=0,sd=10,shape=(n_hidden+1)) #component, updates = theano.scan(fn=lambda x: T.sum(C-x)**2,sequences=[X_train]) y_out=[] for x in X_train: #rbf_out = T.exp(-lam*T.sum((C-x)**2,axis=1)) #1d speed up rbf_out = T.exp(-lam*(C-x)**2) #rbf_out = theano.printing.Print(rbf_out) rbf_out_biased = \ T.concatenate([ rbf_out, T.alloc(1,1) ], 0) y_out.append(T.dot(w,rbf_out_biased)) y = pm.Normal('y',mu=y_out,sd=0.01,observed=y_train) start = pm.find_MAP(fmin=scipy.optimize.fmin_l_bfgs_b) print start step = pm.NUTS(scaling=start) trace = pm.sample(2000, step, progressbar=False) step = pm.NUTS(scaling=trace[-1]) trace = pm.sample(20000,step,start=trace[-1]) print summary(trace, vars=['C', 'w']) vars = trace.varnames for i, v in enumerate(vars): for d in trace.get_values(v, combine=False, squeeze=False): d=np.squeeze(d) with open(str(v)+".txt","w+") as thefile: for item in d: print>>thefile, item traceplot(trace) plt.show()
def excel_posterior(trace, filename): #Need to read the data again to set activity number and names prj = project_reader(filename) WP_NAMES = np.array(prj[1][:, 0]) WP_NUMBER = prj[1][:, 0].shape[0] PV_names = list() PVpartial_names = list() EV_names = list() COMP_names = list() SPI_names = list() CPI_names = list() Index_names = ["SPI_PROJECT", "CPI_PROJECT", "ETC", "EAC", "TEAC"] RISK_names = list() projectDefinition = prj[1] for x in range(WP_NUMBER): for y in range(2): if (projectDefinition[x][y + 1] != 0): rname = projectDefinition[x][0] + "_Risk_%d" % (y + 1) RISK_names.append(rname) for x in range(WP_NUMBER): PV_names.append("PV_%s" % WP_NAMES[x]) PVpartial_names.append("Partial_PV_%s" % WP_NAMES[x]) EV_names.append("EV_%s" % WP_NAMES[x]) COMP_names.append("COMPLETION_%s" % WP_NAMES[x]) SPI_names.append("SPI_%s" % WP_NAMES[x]) CPI_names.append("CPI_%s" % WP_NAMES[x]) all_names = RISK_names + PV_names + PVpartial_names + EV_names + COMP_names + SPI_names + CPI_names + Index_names outputName = filename + "Output.xlsx" traceName = filename + "Trace.xlsx" pm.summary(trace, varnames=all_names, stat_funcs=[trace_mean, trace_sd, trace_quantiles]).to_excel(outputName, sheet_name="Summary") pm.plot_posterior(trace, varnames=all_names) pm.trace_to_dataframe(trace).to_excel(traceName, sheet_name="Trace")
def _predict_scores_fixed(self, X, **kwargs): mean_trace = dict(pm.summary(self.trace)["mean"]) weights = np.array( [mean_trace["weights[{}]".format(i)] for i in range(self.n_object_features)] ) lambda_k = np.array( [mean_trace["lambda_k[{}]".format(i)] for i in range(self.n_nests)] ) utility = np.dot(X, weights) p = self._get_probabilities_np(utility, lambda_k) return p
def run(self): coloredlogs.install() logging.info('Fetching some data') with dask.set_options(get=dask.multiprocessing.get): data = dask.dataframe.read_csv( '/tmp/split_data/{}/train/*.csv'.format(self.rand_round)) total_size = data.week_num.count().compute() nose.tools.assert_greater(total_size, 100, 'Not enought data!') unique_products = data['product_id'].unique().compute().astype( np.uint16) sample = data.head() logging.info('Got it!') product_id_var = theano.shared(value=sample.product_id.astype( 'category', categories=unique_products).cat.codes.values, name='product_id_var') adjusted_demand_var = theano.shared( value=sample.adjusted_demand.values, name='adjusted_demand_var') model = pm.Model() with model: product_category = pm.Uniform('cat', 0, 1, shape=(unique_products.shape[0], 5)) product_vecs = pm.Normal('vecs', 0, 100, shape=5) adjusted_demand_variance = pm.HalfNormal('demand_variance', 10) product_pred = T.dot(product_category[product_id_var], product_vecs) adjusted_demand = pm.Normal('adjusted_demand', product_pred, adjusted_demand_variance, observed=adjusted_demand_var) minibatches = map(self.expand_batch, self.minibatches(unique_products)) v_params = pm.variational.advi_minibatch( n=100, minibatch_tensors=[product_id_var, adjusted_demand_var], minibatch_RVs=[adjusted_demand], minibatches=minibatches, total_size=total_size, n_mcsamples=5, verbose=True) trace = pm.variational.sample_vp(v_params, draws=500) print(pm.summary(trace)) res = trace[-100:]['cat'].mean(0) self.output().makedirs() pandas.DataFrame(res, index=unique_products.values).to_msgpack( self.output().path)
def run_mcmc(self, spec_method='flexible'): with pm.Model() as mdl: if spec_method == 'flexible': # specify priors self.logger.info('specifying priors') intercept = pm.Normal('intercept', mu=0., sd=1000.) x1_coef = pm.Normal('x1_coef', mu=0., sd=1000.) x2_coef = pm.Normal('x2_coef', mu=0., sd=1000.) # residual_std = pm.HalfCauchy('sigma', beta=10, testval=1.) residual_std = pm.Gamma('residual_std', mu=1., sd=1000., testval=1.) # specify likelihood self.logger.info('specifying likelihood') mu = (intercept + x1_coef * self.dataset['X'][:, 0] + x2_coef * self.dataset['X'][:, 1]) likelihood = pm.Normal('y', mu=mu, sd=residual_std, observed=self.dataset['y']) elif spec_method == 'patsy_glm': data_dict = { 'y': self.dataset['y'], 'x1': self.dataset['X'][:, 0], 'x2': self.dataset['X'][:, 1], } self.logger.info('specifying model using patsy glm method') pm.glm.GLM.from_formula('y ~ x1 + x2', data_dict) else: raise ValueError( 'unrecognised spec_method {}'.format(spec_method)) # run mcmc (using automatically chosen sampler, e.g. NUTS sampling) self.logger.info('running mcmc') trace = pm.sample(6000, njobs=1, tune=1000) # note: 'tune' argument handles the burn-in # show results (with no thinning) n_burnin_samples = 0 # burn-in handled above msg = ('summary of marginal posteriors (no thinning):\n{}'.format( pm.summary(trace, start=n_burnin_samples).round(2))) self.logger.info(msg) pm.traceplot(trace, skip_first=n_burnin_samples) plt.show() self._show_custom_plots( trace=trace, params=['intercept', 'x1_coef', 'x2_coef', 'residual_std'], burnin=n_burnin_samples)
def bayesTest(mocktable, outname): import pymc3 as pymc from pymc3.backends import SQLite from collections import Counter idx = {} expr_vector = {} for line in open(mocktable): if line.startswith('Gene'): header = line.strip().split('\t') for i in range(len(header)): if header[i] != 'Gene': idx[header[i]] = i else: vals = line.strip().split('\t') gene = vals[0] for sample in idx: if sample not in expr_vector: expr_vector[sample] = [float(vals[idx[sample]])] else: expr_vector[sample].append(float(vals[idx[sample]])) for sample in expr_vector: if sample == 'Neurons': neuro = expr_vector[sample] if sample == 'Astrocytes': astro = expr_vector[sample] if sample == 'Oligodendrocytes': oligo = expr_vector[sample] if sample == 'Sample1': one = expr_vector[sample] if sample == 'Sample2': two = expr_vector[sample] if sample == 'Sample3': three = expr_vector[sample] samples = [one, two, three] for s in samples: model = pymc.Model() with pymc.Model() as model: beta = pymc.Dirichlet('beta', a=np.array([1.0, 1.0, 1.0])) sigma = pymc.HalfNormal('sigma', sd=1) y_est = beta[0] * neuro + beta[1] * astro + beta[2] * oligo likelihood = pymc.Normal('y', mu=y_est, sd=sigma, observed=s) trace = pymc.sample(1000, random_seed=123, progressbar=True) s = pymc.summary(trace) #print trace['beta'] #matrix with 3 columns and 1000 rows, need to convert this and do math neurons = trace['beta'][:, 0] astrocytes = trace['beta'][:, 1] oligodendrocytes = trace['beta'][:, 2] n_avg = np.mean(neurons) n_med = np.median(neurons) data = Counter(neurons) data.most_common() n_mode = data.most_common(1)[0][0] print n_avg, n_med, n_mode
def compare_parameters_individual(model, parameters, comparisons=None): if comparisons is None: comparisons = [] n_params = len(parameters) n_comps = len(comparisons) subjects = model.data['subject'].unique().astype(int) summaries = [summary(trace) for trace in model.trace] comparison_df = [] for p, parameter in enumerate(parameters): # Comparisons for c, comparison in enumerate(comparisons): comparison_string = '{}-{}'.format(*comparison) df_pc = pd.DataFrame(dict(subject=subjects, parameter=parameter, comparison=comparison_string), index=subjects) # Check if parameter has dependence if model.design[parameter]['dependence'] is not None: # Then, if both conditions are present, plot posterior of the difference c0_present = ( comparison[0] in model.design[parameter]['conditions']) c1_present = ( comparison[1] in model.design[parameter]['conditions']) if c0_present & c1_present: differences = np.array([(model.trace[i].get_values(parameter + '_' + comparison[0]) - model.trace[i].get_values(parameter + '_' + comparison[1])) for i in subjects])[:, :, 0, 0] means = np.mean(differences, axis=1) hpdlower, hpdupper = hpd(differences.T, alpha=0.05).T plarger0 = np.mean(differences > 0, axis=1) df_pc['mean'] = means df_pc['hpd_2.5'] = hpdlower df_pc['hpd_97.5'] = hpdupper df_pc['p>0'] = plarger0 else: # Otherwise, state that at least one condition is not present. df_pc['warning'] = 'At least one condition is missing.' else: # Or that the parameter has no dependencies. df_pc['warning'] = 'Parameter has no dependencies.' comparison_df.append(df_pc) comparison_df = pd.concat(comparison_df, sort=False).sort_values('subject').reset_index(drop=True) return comparison_df
def main(StartYear, EndYear, n_draw, model): data = ReadData(StartYear, EndYear) training_data_df = data.train if model == 'exponential': model_obj = exponential_model(training_data_df) elif model == 'hidden_vol': model_obj = hidden_vol_model(training_data_df) else: raise NotImplementedError n_cpus = multiprocessing.cpu_count() print('[INFO {}] starts sampling on {} CPUs.'.format(now(), n_cpus)) with model_obj: trace = pm.sample(draws=n_draw, njobs=n_cpus) pm.summary(trace) output_file = '{}_model_trace.pkl'.format(model) with open(output_file, 'wb') as output_file_obj: pickle.dump(trace, output_file_obj)
def run(): plt.rcParams['figure.figsize'] = 14, 6 np.random.seed(0) print('Running on PyMC3 v{}'.format(pm.__version__)) # decide poisson theta values theta_noalcohol_meds = 1 # no alcohol, took an antihist theta_alcohol_meds = 3 # alcohol, took an antihist theta_noalcohol_nomeds = 6 # no alcohol, no antihist theta_alcohol_nomeds = 36 # alcohol, no antihist # create samples q = 1000 df = pd.DataFrame({ 'nsneeze': np.concatenate((np.random.poisson(theta_noalcohol_meds, q), np.random.poisson(theta_alcohol_meds, q), np.random.poisson(theta_noalcohol_nomeds, q), np.random.poisson(theta_alcohol_nomeds, q))), 'alcohol': np.concatenate((np.repeat(False, q), np.repeat(True, q), np.repeat(False, q), np.repeat(True, q))), 'nomeds': np.concatenate((np.repeat(False, q), np.repeat(False, q), np.repeat(True, q), np.repeat(True, q)))}) g = sns.catplot(x='nsneeze', row='nomeds', col='alcohol', data=df, kind='count', size=4, aspect=1.5) fml = 'nsneeze ~ alcohol + antihist + alcohol:antihist' # full patsy formulation fml = 'nsneeze ~ alcohol * nomeds' # lazy, alternative patsy formulation (mx_en, mx_ex) = pt.dmatrices(fml, df, return_type='dataframe', NA_action='raise') pd.concat((mx_ex.head(3),mx_ex.tail(3))) with pm.Model() as mdl_fish: # define priors, weakly informative Normal b0 = pm.Normal('b0_intercept', mu=0, sigma=10) b1 = pm.Normal('b1_alcohol[T.True]', mu=0, sigma=10) b2 = pm.Normal('b2_nomeds[T.True]', mu=0, sigma=10) b3 = pm.Normal('b3_alcohol[T.True]:nomeds[T.True]', mu=0, sigma=10) # define linear model and exp link function theta = (b0 + b1 * mx_ex['alcohol[T.True]'] + b2 * mx_ex['nomeds[T.True]'] + b3 * mx_ex['alcohol[T.True]:nomeds[T.True]']) ## Define Poisson likelihood y = pm.Poisson('y', mu=np.exp(theta), observed=mx_en['nsneeze'].values) trc_fish = pm.sample(1000, tune=1000, cores=4) rvs_fish = [rv.name for rv in strip_derived_rvs(mdl_fish.unobserved_RVs)] plot_traces_pymc(trc_fish, varnames=rvs_fish) print(np.exp(pm.summary(trc_fish, varnames=rvs_fish)[['mean','hpd_2.5','hpd_97.5']])) plt.show()
def summary(trace, **kwargs): """Improve PyMC3 summary function by adding posterior mode. :param trace: PyMC3 trace object :param kwargs: keyword args for PyMC3 trace summary function :returns: PyMC3 trace summary in a pandas DataFrame """ return pm.summary( trace, extend=True, stat_funcs=[lambda x: pd.Series(posterior_mode(x), name='mode')], **kwargs)
def estimate_statistic_mcmc(data): """ To be done """ with pm.Model() as model: mu = pm.Normal('mu', mu=0, sd=5) std = pm.Normal('std', mu=1, sd=3) obs = pm.Normal('obs', mu=mu, sd=std, observed=data) with model: trace = pm.sample(1000) #mu = pm.summary(trace) return pm.summary(trace)
def sample(self, draws=1000, tune=1000, chains=4, **kwargs): with self.model as model: map_params = pm.find_MAP() self.trace = pm.sample(draws=draws, tune=tune, chains=chains, start=map_params, **kwargs) return pm.summary( self.trace, varnames=["period", "lighttime", "tref", "varpi", "eccen"])
def test_save_and_load_work_correctly(self): print("") self.test_HLM.fit(self.X_train, self.cat_train, self.Y_train) probs1 = self.test_HLM.predict_proba(self.X_test, self.cat_test) probs2 = self.test_HLM.predict_proba(self.X_test, self.cat_test) self.test_HLM.save(self.test_dir) HLM2 = HLM() HLM2.load(self.test_dir) self.assertEqual(self.test_HLM.num_cats, HLM2.num_cats) self.assertEqual(self.test_HLM.num_pred, HLM2.num_pred) self.assertEqual(summary(self.test_HLM.advi_trace), summary(HLM2.advi_trace)) for key in self.test_HLM.v_params.means.keys(): np.testing.assert_equal(self.test_HLM.v_params.means[key], HLM2.v_params.means[key]) probs3 = HLM2.predict_proba(self.X_test, self.cat_test) np.testing.assert_almost_equal(probs3, probs1, decimal=1)
def run(n=1500): if n == 'short': n = 50 with m: trace = pm.sample(n) pm.traceplot(trace, varnames=['mu_hat']) print('Example observed data: ') print(y[:30, :].T) print('The true ranking is: ') print(yreal.flatten()) print('The Latent mean is: ') latentmu = np.hstack(([0], pm.summary(trace, varnames=['mu_hat'])['mean'].values)) print(np.round(latentmu, 2)) print('The estimated ranking is: ') print(np.argsort(latentmu))
log_like2 = - 0.5 * n * tt.log(2 * np.pi) \ - 0.5 * tt.log(dsigma) \ - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2) return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2)) with pm.Model() as ATMIP_test: X = pm.Uniform('X', shape=n, lower=-2. * np.ones_like(mu1), upper=2. * np.ones_like(mu1), testval=-1. * np.ones_like(mu1), transform=None) like = pm.Deterministic('like', two_gaussians(X)) llk = pm.Potential('like', like) with ATMIP_test: step = atmcmc.ATMCMC(n_chains=n_chains, tune_interval=tune_interval, likelihood_name=ATMIP_test.deterministics[0].name) trcs = atmcmc.ATMIP_sample( n_steps=n_steps, step=step, njobs=njobs, progressbar=True, trace=test_folder, model=ATMIP_test) pm.summary(trcs) Pltr = pm.traceplot(trcs, combined=True) plt.show(Pltr[0][0])
trace = mc.sample(nsamples, step=step, start=start, njobs=self.njobs, trace=backend) return trace if __name__ == "__main__": def real_func(): x = np.linspace(0.01, 1.0, 10) f = x + np.random.randn(len(x))*0.01 return f def model_func(beta): x = np.linspace(0.01, 1.0, 10) f = beta return f data = real_func() tau_obs = np.eye(10)/.01**2 tau_prior = np.eye(10)/1.0**2 beta_prior = np.ones_like(data)*1.0 beta_map = np.linspace(0.01, 1.0, 10) + np.random.randn(10)*0.1 sampler = MCMCSampler(model_func, data, tau_obs, beta_prior, tau_prior, beta_map, is_cov=False, method=None) trace = sampler.sample(2000) mc.summary(trace) mc.traceplot(trace) plt.figure() plt.plot(beta_map, label='ACTUAL') plt.plot(np.mean(trace['beta'][:,:], axis=0), label='MCMC') plt.show()
} """ def get_garch_model(): r = np.array([28, 8, -3, 7, -1, 1, 18, 12], dtype=np.float64) sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float64) alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18], dtype=np.float64) shape = r.shape with Model() as garch: alpha1 = Uniform('alpha1', 0., 1., shape=shape) beta1 = Uniform('beta1', 0., 1 - alpha1, shape=shape) mu = Normal('mu', mu=0., sd=100., shape=shape) theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) + beta1 * tt.pow(sigma1, 2)) Normal('obs', mu, sd=theta, observed=r) return garch def run(n=1000): if n == "short": n = 50 with get_garch_model(): tr = sample(n, tune=1000) return tr if __name__ == '__main__': summary(run())
def runModel(): observation = simulateData() nTrans = len(observation['spectype']) # Create the pymc3 model and fill it with the distributions and parameters # of the model basic_model = Model() with basic_model: r""" Cosmology Node. The FlatwCDM cosmology. pdf(Om0, w0) We need the flexibility to switch in and out different cosmological models. The function that describes luminosity distance is specific to the model: the parameters and function should be packaged together. Parameters ---------- Om0: Omega_M w0: constant equation of state w """ Om0 = Lognormal('Om0', mu=numpy.log(0.28), tau=1/.1/.1) w0 = Normal('w0', mu=-1, sd=0.05) """ Calibration Node. Global zeropoints for each band. pdf(Z) The transmission function of the bands will be used later. The transmission and zeropoints should be packaged together. More complicated parameterizations of calibration are expected. Parameters ----------- Z: zeropoint (in mag) for the bands """ n_bands = 1 zeropoints = Normal('zeropoints', mu=0, sd=.02, shape = n_bands) """ SN Ia Rate Node. rate_Ia_r = constant For SN cosmology the relative rates between different populations are sufficient. Rates of all types are relative the snIa rate, so snIa rate is taken to be 1. Parameters ----------- rate_Ia_r =1 : the relative rates are relative to type Ia. Fixed. """ rate_Ia_r = 1. """ SN II Rate Node. The rate of SNe II realtiave SNIa. pdf(rate_II_r) Along with the rate parameters is a rate model. There should be equivalent nodes for all other transient types being modeled. Parameters ---------- rate_II_r : relative rate of SNe II compared to SNe Ia. """ rate_II_r = Uniform('rate_II_r', lower=0.25, upper=4) """ SN Ia luminosity Node. (actually working in log-L) pdf(logL_snIa, sigma_snIa) For the moment consider the SN to be phase-indepemdent with no internal parameters. Eventually this will represent time-evolving SED, e.g. SALT2. Parameters ---------- logL_snIa : SN Ia mean log-luminosity sigma_snIa : intrinsic dispersion (mag) """ logL_snIa = Normal('logL_snIa', mu=numpy.log(1), sd = 0.02) sigma_snIa = Lognormal('sigma_snIa', mu=numpy.log(0.1), tau=1./0.1/0.1) """ SN Ia luminosity Node. (actually working in log-L) pdf(logL_snII, sigma_snIa) Parameters ---------- logL_snII : SN II mean log-luminosity sigma_snII : intrinsic dispersion (mag) """ logL_snII = Normal('logL_snII', mu=numpy.log(0.5), sd=0.02) sigma_snII = Lognormal('sigma_snII', mu=numpy.log(0.4), tau=1./0.1/0.1) """ Enter the plate that considers one supernova at a time """ for i in xrange(nTrans): """ Type Probability Node. Probabilities of being a type of object. For now only SN Ia, and SN II. Dependencies ------------- rate_Ia_r : Type Ia rate rate_II_r : Type II rate host galaxy : Not implemented now but eventually depends on host properties Parameters ---------- prob : probability of the object being a type Ia. Fixed. """ prob = rate_Ia_r/(rate_Ia_r+rate_II_r) """ Type Node. Not explicitly considered in our model. """ """ Observed Type Node and Luminosity Node. pdf(Obs type, Luminosity | Type prob, logL_snIa, logL_snII) There are two possibilities: 1. There is an observed type assumed to be perfect. pdf(Obs type | Type) = delta(Obs type - Type) then pdf(Obs type, Luminosity | Type prob, logL_snIa, logL_snII) = sum_i pdf(Obs type| Type_i) * pdf(Luminosity | Type_i, logL_snIa, logL_snII) * pdf(Type_i | Type prob) = pdf(Luminosity | Type=Obs type, logL_snIa, logL_snII) * pdf(Type=Obs type | Type prob) The class LogLuminosityGivenSpectype is responsible for providing this pdf 2. There is no observed type. pdf(Luminosity | Type prob, logL_snIa, logL_snII) = sum_i pdf(Luminosity | Type_i, logL_snIa, logL_snII) * pdf(Type_i | Type prob) The class LuminosityMarginalizedOverType is responsible for providing this pdf Dependencies ------------ prob : logL_snIa : sigma_snIa : logL_snII : sigma_snII : Parameters ---------- obstype : observed type, SN Ia=0, SNII=1 Marginalized over Luminosity : """ if observation['spectype'][i] == -1 : logluminosity = LogLuminosityMarginalizedOverType('logluminosity'+str(i), mus=[logL_snIa, logL_snII], \ sds = [numpy.log(10)/2.5*sigma_snIa,numpy.log(10)/2.5*sigma_snII], p=prob, \ testval = 1.) else: if observation['spectype'][i] == 0: usemu = logL_snIa usesd = numpy.log(10)/2.5*sigma_snIa usep = prob else: usemu = logL_snII usesd = numpy.log(10)/2.5*sigma_snII usep = 1-prob logluminosity = LogLuminosityGivenSpectype('logluminosity'+str(i), \ mu=usemu,sd=usesd, p=usep) luminosity = T.exp(logluminosity) """ Redshift Node. Not considered explicitly in our model. """ """ Observed Redshift, Counts Node. pdf(observed redshift, Counts | Luminosity, Redshift, Cosmology, Calibration) = pdf(observed redshift| Redshift) * pdf(Counts | Luminosity, Redshift, Cosmology, Calibration) The pdf of the observed redshift is assumed to be a sum of delta functions, perfectly measured redshift of the supernova or redshifts of potential galaxy hosts. pdf(observed redshift | Redshift) = sum_i p_i delta(observer redshift_i - Redshift) where p_i is the probability of observer redshift_i being the correct redshift. so pdf(observed redshift, Counts | Luminosity, Redshift, Cosmology, Calibration) = sum_i p_i pdf(Counts | Luminosity, Redshift=observer_redshift_i, Cosmology, Calibration) The class CountsWithThreshold handles this pdf Dependencies ------------ luminosity : luminosity redshift : host redshift cosmology : cosmology Calibration : calibration Parameters ----------- observed_redshift Marginalized over counts """ lds=[] fluxes=[] for z_ in observation['specz'][i]: # ld = 0.5/h0*(z_+T.sqr(z_))* \ # (1+ 1//T.sqrt((1+z_)**3 * (Om0 + (1-Om0)*(1+z_)**(3*w0)))) ld = luminosity_distance(z_, Om0, w0) lds.append(ld) fluxes.append(luminosity/4/numpy.pi/ld**2) counts = Counts('counts'+str(i),fluxes =fluxes, \ pzs = observation['zprob'][i], Z=zeropoints, observed=observation['counts'][i]) if observation['spectype'][i] == -1 : pass else: normalization=SampleRenormalization('normalization'+str(i), threshold = 1e-9, logL_snIa=logL_snIa, sigma_snIa=sigma_snIa, logL_snII=logL_snII, sigma_snII=sigma_snII, luminosity_distances=lds, Z=zeropoints, pzs=observation['zprob'][i], prob=prob, observed=1) from pymc3 import find_MAP, NUTS, sample, summary from scipy import optimize with basic_model: backend = SQLite('trace.sqlite') # obtain starting values via MAP start = find_MAP(fmin=optimize.fmin_bfgs, disp=True) # draw 2000 posterior samples trace = sample(500, start=start, trace=backend) summary(trace)
def posterior_summary(self, **kwargs): return pm.summary(self.posterior_, **kwargs)
def mixed_effects(): le = preprocessing.LabelEncoder() # Convert categorical variables to integer # participants_idx = le.fit_transform(messages['prev_sender']) classes = 'FF49_industry' # classes = 'underwriter_tier' # classes = 'amends' print("Grouping by: {}".format(classes)) FF49_industry = le.fit_transform(df['FF49_industry']) class_idx = le.fit_transform(df[classes]) n_classes = len(le.classes_) NSamples = 50000 burn = NSamples/10 thin = 2 covariates = [ 'Intercept', '#Syndicate Members', '#Lead Underwriters', 'Underwriter Rank', # 'FF49 Industry', 'Amends Down', '#S1A Amendments', 'Share Overhang', 'log(1+Sales)', 'log(Proceeds)', 'CASI', # 'media_1st_pricing', # 'VC', 'IPO Market Returns', 'Industry Returns', 'BAA Spread', ] y = df['days_to_first_price_update'].values # y = np.ma.masked_values(list(df.days_to_first_price_update), value=-999) with pm.Model() as model: # Parameters: intercept = pm.Gamma('Intercept', alpha=.1, beta=.1, shape=n_classes) beta_underwriter_syndicate_size = pm.Normal('#Syndicate Members', mu=0, sd=20) beta_underwriter_num_leads = pm.Normal('#Lead Underwriters', mu=0, sd=20) beta_underwriter_rank_avg = pm.Normal('Underwriter Rank', mu=0, sd=20) beta_num_SEC_amendments = pm.Normal('#S1A Amendments', mu=0, sd=20) # beta_FF49_industry = pm.Normal('FF49 Industry', mu=0, sd=20) beta_amends_down = pm.Normal('Amends Down', mu=0, sd=20) beta_share_overhang = pm.Normal('Share Overhang', mu=0, sd=20) beta_log_sales = pm.Normal('log(1+Sales)', mu=0, sd=20) beta_log_proceeds = pm.Normal('log(Proceeds)', mu=0, sd=20) beta_CASI = pm.Normal('CASI', mu=0, sd=20) # beta_media_1st_pricing = pm.Normal('media_1st_pricing', mu=0, sd=20) # beta_VC = pm.Normal('VC', mu=0, sd=20) beta_BAA_spread = pm.Normal('BAA Spread', mu=0, sd=20) beta_M3_initial_returns = pm.Normal('IPO Market Returns', mu=0, sd=20) beta_M3_indust_rets = pm.Normal('Industry Returns', mu=0, sd=20) # Hyperparameters ## alpha: hyperparameters for neg-binom distribution alpha = pm.Gamma('alpha', alpha=.1, beta=.1) # #Poisson Model Formula mu = 1 + tt.exp( intercept[class_idx] + beta_underwriter_syndicate_size * df.underwriter_syndicate_size + beta_underwriter_num_leads * df.underwriter_num_leads + beta_underwriter_rank_avg * df.underwriter_rank_avg # + beta_FF49_industry * FF49_industry + beta_amends_down * df['Amends Down'] + beta_num_SEC_amendments * df.num_SEC_amendments + beta_share_overhang * df['Share Overhang'] + beta_log_sales * df['log(1+Sales)'] + beta_CASI * df['CASI'] + beta_log_proceeds * df['log(Proceeds)'] # + beta_media_1st_pricing * df.media_1st_pricing # + beta_VC * df.VC + beta_BAA_spread * df['BAA Spread'] + beta_M3_initial_returns * df.M3_initial_returns + beta_M3_indust_rets * df.M3_indust_rets ) # Dependent Variable BoundedNegativeBinomial = pm.Bound(pm.NegativeBinomial, lower=1) y_est = BoundedNegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y) y_pred = BoundedNegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape) # y_est = pm.NegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y) # y_pred = pm.NegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape) # y_est = pm.Poisson('y_est', mu=mu, observed=data) # y_pred = pm.Poisson('y_pred', mu=mu, shape=data.shape) start = pm.find_MAP() step = pm.Metropolis(start=start) # step = pm.NUTS() # backend = pm.backends.Text('test') # trace = pm.sample(NSamples, step, start=start, chain=1, njobs=2, progressbar=True, trace=backend) trace = pm.sample(NSamples, step, start=start, njobs=1, progressbar=True) trace2 = trace trace = trace[-burn::thin] # waic = pm.waic(trace) # dic = pm.dic(trace) # with pm.Model() as model: # trace_loaded = pm.backends.sqlite.load('FF49_industry.sqlite') # y_pred.dump('FF49_industry_missing/y_pred') ## POSTERIOR PREDICTIVE CHECKS y_pred = trace.get_values('y_pred') pm.summary(trace, vars=covariates) # PARAMETER POSTERIORS anno_kwargs = {'xycoords': 'data', 'textcoords': 'offset points', 'rotation': 90, 'va': 'bottom', 'fontsize': 'large'} anno_kwargs2 = {'xycoords': 'data', 'textcoords': 'offset points', 'rotation': 0, 'va': 'bottom', 'fontsize': 'large'} n0, n1, n2, n3 = 1, 5, 9, 14 # numbering for posterior plots # intercepts # mn = pm.df_summary(trace)['mean']['Intercept_log__0'] # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=blue, **anno_kwargs2) # mn = pm.df_summary(trace)['mean']['Intercept_log__1'] # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=purple, **anno_kwargs2) # coeffs # mn = pm.df_summary(trace)['mean'][2] # ax[1,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5, 10), color=red, **anno_kwargs) # mn = pm.df_summary(trace)['mean'][3] # ax[2,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) # mn = pm.df_summary(trace)['mean'][4] # ax[3,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) # plt.savefig('figure1_mixed.png') ax = pm.traceplot(trace, vars=['Intercept']+trace.varnames[n0:n1], lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()} ) for i, mn in enumerate(pm.df_summary(trace)['mean'][n0:n1]): # +1 because up and down intercept ax[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) plt.savefig('figure1_mixed.png') ax2 = pm.traceplot(trace, trace.varnames[n1:n2], lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()} ) for i, mn in enumerate(pm.df_summary(trace)['mean'][n1:n2]): # +1 because up and down intercept ax2[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) plt.savefig('figure2_mixed.png') ax3 = pm.traceplot(trace, trace.varnames[n2:n3], lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()} ) for i, mn in enumerate(pm.df_summary(trace)['mean'][n2:n3]): # +1 because up and down intercept ax3[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) plt.savefig('figure3_mixed.png') # _ = plt.figure(figsize=(5, 6)) _ = pm.forestplot(trace, vars=['Intercept'], ylabels=le.classes_) plt.savefig('forestplot_intercepts.png') _ = pm.forestplot(trace, vars=covariates[1:], ylabels=covariates[1:]) plt.savefig('forestplot_mixed.png') # pm.traceplot(trace, vars=['alpha', 'y_pred']) # def participant_y_pred(entity_name, burn=1000, hierarchical_trace=trace): # """Return posterior predictive for person""" # ix = np.where(le.classes_ == entity_name)[0][0] # return hierarchical_trace['y_pred'][burn:, ix] def participant_y_pred(entity_name, burn=1000, ypred=y_pred): """Return posterior predictive for person""" ix = np.where(le.classes_ == entity_name)[0][0] return ypred[burn:, ix] days = 7 fig = plt.figure(figsize=(16,10)) fig.add_subplot(221) entity_plotA('Up', days=days) fig.add_subplot(222) entity_plotB('Up') fig.add_subplot(223) entity_plotA('Down', days=days) fig.add_subplot(224) entity_plotB('Down') plt.savefig("figure4-postpreddist-updown")
def run(n=5000): with model_1: trace = pm.sample(n) pm.summary(trace)
def get_garch_model(): r = np.array([28, 8, -3, 7, -1, 1, 18, 12]) sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18]) alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18]) shape = r.shape with Model() as garch: alpha1 = Normal('alpha1', mu=np.zeros(shape=shape), sd=np.ones(shape=shape), shape=shape) BoundedNormal = Bound(Normal, upper=(1 - alpha1)) beta1 = BoundedNormal('beta1', mu=np.zeros(shape=shape), sd=1e6 * np.ones(shape=shape), shape=shape) mu = Normal('mu', mu=np.zeros(shape=shape), sd=1e6 * np.ones(shape=shape), shape=shape) theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) + beta1 * tt.pow(sigma1, 2)) Normal('obs', mu, sd=theta, observed=r) return garch def run(n=1000): if n == "short": n = 50 with get_garch_model(): tr = sample(n, n_init=10000) return tr if __name__ == '__main__': print(summary(run()))
refueling)) rough_weather = pmd.Bernoulli('Rough Weather', p=0.35) flight_time = pmc.Exponential('Flight Time', lam=0.5 - (0.1 * rough_weather)) arrival_traffic_delay = pmc.Wald('Arrival Traffic Delay', mu=0.1, lam=0.2) arrival_time = pm.Deterministic('Arrival time', departure_time + flight_time + arrival_traffic_delay) nb_samples = 500 with model: samples = pm.sample(draws=nb_samples, random_seed=1000) pm.summary(samples) ## Bayesian binomial-beta %matplotlib inline import pymc3 as pm import seaborn as sb from theano import config config.warn.round=False n = 1000 obs_v1 = 680 obs_v2 = 700 with pm.Model() as model: # context management # define priors prior_v1 = pm.Beta('prior_v1', alpha=2, beta=2)
Hans_Model = pm.Model() with Hans_Model: # Define prior alpha = pm.Normal('alpha_est',mu=0,sd=10) beta = pm.Normal('beta_est',mu=0,sd=10,shape=2) sigma=pm.HalfNormal('sigma_est',sd=1) # Model parameter mu = alpha + beta[0]*X1 + beta[1]*X2 # Likelihood Y_rv = pm.Normal('Y_rv',mu=mu,sd=sigma,observed=Y) ''' Model fitting''' with Hans_Model: # step = pm.Metropolis(vars=[alpha,beta,sigma]) param_MAP = pm.find_MAP(fmin = sp.optimize.fmin_powell) Method = pm.Slice(vars=[alpha,beta,sigma]) trace = pm.sample(Niter,step=Method,start=param_MAP) pm.traceplot(trace) print pm.summary(trace) plt.show() # # plt.plot(trace['alpha_est']) # print pm.summary(trace) # plt.show()
import theano.tensor as T from load_data import load_australian_credit, load_german_credit, load_heart, load_pima_indian import pymc3 as pm import numpy as np from pymc3 import summary from pymc3 import traceplot germanData, germanLabel = load_australian_credit() # germanData, germanLabel = load_pima_indian() # normalize to let each dimension have mean 1 and std 0 g_mean = np.mean(germanData, axis=0) g_std = np.std(germanData, axis=0) germanData = (germanData - g_mean) / g_std with pm.Model() as model: alpha = pm.Normal("alpha_pymc3", mu=0.0, tau=1e-2) beta = pm.Normal("beta_pymc3", mu=0.0, tau=1e-2, shape=14) # for australian data, it has 14 predictors y_hat_prob = 1.0 / (1.0 + T.exp(-(T.sum(beta * germanData, axis=1) + alpha))) yhat = pm.Bernoulli("yhat", y_hat_prob, observed=germanLabel) trace = pm.sample(10000, pm.NUTS()) trace1 = trace[5000:] # get rid of the burn-in samples summary(trace1) traceplot(trace1) alpha_mean = np.mean(trace1["alpha_pymc3"]) beta_mean = np.mean(trace1["beta_pymc3"], axis=0) param_mean = (np.sum(alpha_mean) + np.sum(beta_mean)) / 15.0 print " the overall mean of the parameters: ", param_mean
print(map_estimate) from pymc3 import NUTS, sample from pymc3 import traceplot with basic_model: # obtain starting values via MAP start = find_MAP(fmin=optimize.fmin_powell) # instantiate sampler step = NUTS(scaling=start) # draw 2000 posterior samples trace = sample(2000, step, start=start) trace['alpha'][-5:] traceplot(trace) plt.show() from pymc3 import summary summary(trace) n = 500 p = 0.3 with Model(): x = Normal('alpha', mu=0, sd=10) print type(x)
with mdl_ols: ## find MAP using Powell, seems to be more robust t1 = time.time() start_MAP = pm.find_MAP(fmin=optimize.fmin_powell) t2 = time.time() print("Found MAP, took %f seconds" % (t2 - t1)) ## take samples t1 = time.time() traces_ols = pm.sample(2000, start=start_MAP, step=pm.NUTS(), progressbar=True) print() t2 = time.time() print("Done sampling, took %f seconds" % (t2 - t1)) pm.summary(traces_ols) ## plot the samples and the marginal distributions _ = pm.traceplot( traces_ols, figsize=(12, len(traces_ols.varnames) * 1.5), lines={k: v["mean"] for k, v in pm.df_summary(traces_ols).iterrows()}, ) plt.show() do_tstudent = False if do_tstudent: print("Robust Student-t analysis...")
else: fit_results = np.array([out.values['decay']*delta_t, np.sqrt(out.covar[0,0])*delta_t, out.values['amplitude'], np.sqrt(out.covar[1,1])]) print(out.fit_report(min_correl=0.25)) trace = sm.run(x=data, aB=alpha_B, bB=beta_B, aA=alpha_A, bA=beta_A, delta_t=delta_t, N=N) pm.summary(trace) traceB_results = np.percentile(trace['B'],(2.5,25,50,75,97.5)) traceB_results = np.concatenate((traceB_results, [np.std(trace['B'])], [np.mean(trace['B'])])) traceA_results=np.percentile(trace['A'],(2.5,25,50,75,97.5)) traceA_results = np.concatenate((traceA_results, [np.std(trace['A'])], [np.mean(trace['A'])])) results = np.concatenate((data_results, fit_results, traceB_results, traceA_results)) print(results) if result_array is None: result_array = results else: result_array = np.vstack((result_array, results))
import pymc3 as pm import seaborn as sn import matplotlib.pyplot as plt with pm.Model() as model: uniform = pm.Uniform('uniform', lower=0, upper=1) normal = pm.Normal('normal', mu=0, sd=1) beta = pm.Beta('beta', alpha=0.5, beta=0.5) exponential = pm.Exponential('exponential', 1.0) trace = pm.sample(2000) print(pm.summary(trace).round(2)) pm.traceplot(trace) plt.show()
N_samples = [30, 30, 30] # total number of each groups G_samples = [18, 18, 18] # record of the number of good-quality samples group_idx = np.repeat(np.arange(len(N_samples)), N_samples) data = [] for i in range(0, len(N_samples)): data.extend(np.repeat([1, 0], [G_samples[i], N_samples[i]-G_samples[i]])) print(group_idx, data) base_name = os.path.basename(__file__)[:-3] with pm.Model() as model_h,\ matplotlib.backends.backend_pdf.PdfPages('%s.pdf' % base_name) as pdf_all: # prior alpha = pm.HalfCauchy('alpha', beta=10) beta = pm.HalfCauchy('beta', beta=10) theta = pm.Beta('theta', alpha, beta, shape=len(N_samples)) # likehood y = pm.Bernoulli('y', p=theta[group_idx], observed=data) trace = pm.sample(2000, njobs=1) chain = trace[200:] fig = plt.figure() pm.traceplot(chain) pdf_all.savefig() # mean, standard deviation, and the HPD intervals print(pm.summary(trace))
step1 = pm.NUTS([Pb, mub, sb, b, m]) step2 = pm.BinaryMetropolis([qi], tune_interval=100) step = [step1, step2] samples = pm.sample(niter, start=start_MAP, step=[step1, step2], progressbar=True) ## Declare a point as an outlier if its qi is 0 in more than 99% of the MCMC samples cutoff = 1 outlier = np.percentile(1 - samples[burnin:]["qi"], cutoff, axis=0) outlier = outlier.astype(bool) # the variable 'outlier' is an array of size N with True for outlier points and False for inlier points # the points that are identified as outlier can change from run to run, especially if niter is small ## print a summary of the results pm.summary(samples[burnin:]) ## plot the samples and the marginal distributions ## using the built-in PyMC3 functions pm.traceplot(samples[burnin:]) plt.show() ## in the previous plots and results you will also see ## the parameters s_b_log and Pb_interval ## these are created automatically by PyMC3 ## but we don't need to worry about them # the following two definitions of the function 'lm' are equivalent