def test_time_varying_model(): np.random.seed(1039) data = gen_toy_data() formula_str = "1 + C(weekday)" X_df = patsy.dmatrix(formula_str, data, return_type="dataframe") X_np = X_df.values xi_shape = X_np.shape[1] xi_0_true = np.array([2.0, -2.0, 2.0, -2.0, 2.0, -2.0, 2.0]).reshape(xi_shape, 1) xi_1_true = np.array([2.0, -2.0, 2.0, -2.0, 2.0, -2.0, 2.0]).reshape(xi_shape, 1) xis_rv_true = np.stack([xi_0_true, xi_1_true], axis=1) with pm.Model(**TV_CONFIG) as sim_model: _ = create_dirac_zero_hmm( X_np, mu=1000, xis=xis_rv_true, observed=np.zeros(X_np.shape[0]) ) sim_point = pm.sample_prior_predictive(samples=1, model=sim_model) y_t = sim_point["Y_t"].squeeze().astype(int) split = int(len(y_t) * 0.7) train_y, test_V = y_t[:split], sim_point["V_t"].squeeze()[split:] train_X, test_X = X_np[:split, :], X_np[split:, :] X = shared(train_X, name="X", borrow=True) Y = shared(train_y, name="y_t", borrow=True) with pm.Model() as model: xis_rv = pm.Normal("xis", 0, 10, shape=xis_rv_true.shape) _ = create_dirac_zero_hmm(X, 1000, xis_rv, Y) number_of_draws = 500 with model: steps = [ FFBSStep([model.V_t]), pm.NUTS( vars=[ model.gamma_0, model.Gamma, ], target_accept=0.90, ), ] with model: posterior_trace = pm.sample( draws=number_of_draws, step=steps, random_seed=100, return_inferencedata=True, chains=1, cores=1, progressbar=True, idata_kwargs={"dims": {"Y_t": ["date"], "V_t": ["date"]}}, ) # Update the shared variable values Y.set_value(np.ones(test_X.shape[0], dtype=Y.dtype)) X.set_value(test_X) model.V_t.distribution.shape = (test_X.shape[0],) hdi_data = az.hdi(posterior_trace, hdi_prob=0.95, var_names=["xis"]).to_dataframe() hdi_data = hdi_data.unstack(level="hdi") xis_true_flat = xis_rv_true.squeeze().flatten() check_idx = ~np.in1d( np.arange(len(xis_true_flat)), np.arange(3, len(xis_true_flat), step=4) ) assert np.all( xis_true_flat[check_idx] <= hdi_data["xis", "higher"].values[check_idx] ) assert np.all( xis_true_flat[check_idx] >= hdi_data["xis", "lower"].values[check_idx] ) trace = posterior_trace.posterior.drop_vars(["Gamma", "V_t"]) with aesara.config.change_flags(compute_test_value="off"): adds_pois_ppc = pm.sample_posterior_predictive( trace, var_names=["V_t", "Y_t", "Gamma"], model=model ) assert (np.abs(adds_pois_ppc["V_t"] - test_V) / test_V.shape[0]).mean() < 1e-2
def wake_mode_use(): wake_data.sort_values('time_offset', inplace=True) time = np.array(wake_data.loc[:, 'time_offset']) wake_obs = np.array(wake_data.loc[:, 'indicator']) # ----------------------------------清醒数据模型--------------------------------- with pm.Model() as wake_model: alpha = pm.Normal('alpha', mu=0.0, tau=0.01, testval=0.0) beta = pm.Normal('beta', mu=0.0, tau=0.01, testval=0.0) p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha))) # observed = pm.Bernoulli('obs', p, observed=wake_obs) step = pm.Metropolis() wake_trace = pm.sample(N_SAMPLES, step=step) # ------------------------------100个样本的后验概率-------------------------------------------------------------------- alpha_samples = wake_trace["alpha"][100:, None] beta_samples = wake_trace["beta"][100:, None] time_est = np.linspace(time.min() - 15, time.max() + 15, int(1e3))[:, None] alpha_est = alpha_samples.mean() beta_est = beta_samples.mean() wake_est = logistic(time_est, beta=beta_est, alpha=alpha_est) figsize(13, 6) plt.plot(time_est, wake_est, color='darkred', lw=3, label="清醒时候的平均睡眠后验概率") plt.scatter(time, wake_obs, edgecolor='r', facecolor='r', s=50, alpha=0.05, label='观测值') plt.title('%d个样本的后验概率' % N_SAMPLES) plt.legend(prop={'size': 14}) plt.ylabel('概率') plt.xlabel('上午时间') plt.xticks([-60, -30, 0, 30, 60, 90, 120], wake_labels) plt.show() print('清醒的概率大于50%的时间点位于上午 6:{}'.format( int(time_est[np.where(wake_est < 0.5)][0]))) colors = ["#348ABD", "#A60628", "#7A68A6"] cmap = matplotlib.colors.LinearSegmentedColormap.from_list("BMH", colors) figsize(12, 6) probs = wake_trace['p'] # ------------------------------上午时间的睡眠概率-------------------------------------------------------------------- plt.scatter(time, probs.mean(axis=0), cmap=cmap, c=probs.mean(axis=0), s=50) plt.title('上午时间的睡眠概率') plt.xlabel('上午时间') plt.ylabel('概率') plt.xticks([-60, -30, 0, 30, 60, 90, 120], wake_labels) plt.show() print('上午5:30 清醒的概率: {:.2f}%.'.format( 100 - (100 * logistic(-30, beta=beta_est, alpha=alpha_est)))) print('上午6:00清醒的概率: {:.2f}%.'.format( 100 - (100 * logistic(0, beta=beta_est, alpha=alpha_est)))) print('上午6:30清醒的概率: {:.2f}%.'.format( 100 - (100 * logistic(30, beta=beta_est, alpha=alpha_est))))
def test_stable(self): X = np.random.uniform(low=320., high=400., size=[2000, 2]) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, 0.1) dists = theano.function([], cov.square_dist(X, X))() assert not np.any(dists < 0)
# start = pm.find_MAP() # # step = pm.Metropolis() # trace2 = pm.sample(4000, start=start) # chain2 = trace2 # varnames1 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4'] # pm.traceplot(chain2, varnames1) # plt.show() # # # 画出自相关曲线 # pm.autocorrplot(chain2) # plt.show() # ====================================================================== # 改成威布尔分布后还需要调节参数 # #partial_model 部分集中模型 with pm.Model() as mulpartial_model: # define priors sigma = pm.HalfCauchy('sigma', 20) # sigma = pm.Normal('sigma', 0, 20) # nu = pm.Exponential('nu', 1/30) # mu_a = pm.Uniform('mu_a', -10, 10) # sigma_a = pm.HalfNormal('sigma_a', sd=10) # mu_a = pm.Uniform('mu_a', -10, 10) # sigma_a = pm.HalfNormal('sigma_a', sd=100) beta = pm.Normal('beta', 0, 100, shape=companiesABC) beta1 = pm.Normal('beta1', 0, 20) beta2 = pm.Normal('beta2', 0, 100) beta3 = pm.Normal('beta3', 0, 20) # beta4 = pm.Normal('beta4', 0, 20)
sig2N = 10 sig2P = 25 muP = 0 # Predictor variable Sest = np.zeros(N) Sreal = np.zeros(N) Xall = np.zeros(N) for ii in xrange(N): X1 = np.random.rand(1) * 10 - 5 lamb = 3 * np.exp(-(X1 - phi)**2 / (2.0 * sig2N)) R = np.random.poisson(lamb, size=(n_input)).astype('float32') basic_model = pm.Model() with basic_model: # Priors for unknown model parameters alpha = pm.Normal('alpha', mu=muP, sd=sig2P) # Expected value of outcome mu = 3 * np.exp(-(alpha - phi)**2 / (2.0 * sig2N)) # Likelihood (sampling distribution) of observations Y_obs = pm.Poisson('Y_obs', mu=mu, observed=R) map_estimate = pm.find_MAP(model=basic_model) a = np.ones(n_input) / sig2N e = phi / sig2N
# 50% 52.760000 # 75% 54.595000 # max 57.480000 data.describe() # count 48.000000 # mean 53.496458 # std 3.456198 # min 47.720000 # 25% 51.582500 # 50% 52.875000 # 75% 54.960000 # max 68.580000 # normal with pm.Model() as model_g: mu = pm.Uniform('mu', lower=40, upper=70) sigma = pm.HalfNormal('sigma', sd=10) y = pm.Normal('y', mu=mu, sd=sigma, observed=data) trace_g = pm.sample(1000) # students t with pm.Model() as model_t: mu = pm.Uniform('mu', 40, 70) sigma = pm.HalfNormal('sigma', sd=10) v = pm.Exponential('v', 1 / 30) y = pm.StudentT('y', mu=mu, sd=sigma, nu=v, observed=data) trace_t = pm.sample(1000) data2 = Series(data, copy=True) data2[48] = 65
# prepare data milk = pd.read_csv('Data/milk.csv', sep=';') milk.shape d = milk.dropna().copy() d.shape d.columns d['neocortex'] = d['neocortex.perc']/100 d[['neocortex', 'neocortex.perc']] d['lmass'] = np.log(d['mass']) # fit models d['kcal.per.g'].describe() #m6_11 = pm.Model() with pm.Model() as m6_11: alpha = pm.Uniform('alpha', 0, 5) log_sigma = pm.Uniform('log_sigma', -10, 10) mu = alpha y_obs = pm.Normal('y_obs', mu=mu, sigma=np.exp(log_sigma), observed=d['kcal.per.g']) pm.find_MAP(model=m6_11, method='BFGS') with m6_11: trace = pm.sample(2000, return_inferencedata=True, chains=2) pm.summary(trace) az.summary(trace) #pm.gelman_rubin(trace) with m6_11: az.plot_trace(trace) plt.show()
def test_discrete_continuous(self): with pm.Model() as model: a = pm.Poisson("a", 5) b = pm.HalfNormal("b", 10) y = pm.Normal("y", a, b, observed=[1, 2, 3, 4]) trace = pm.sample_smc()
# generate some data y = np.zeros((len(timerange))) for i in range(len(timerange)): y[i] = alpha + beta * timerange[i]**2 + np.random.rand() y = [y, y + np.random.rand(len(y))] def sys_model(alpha, beta): return alpha + beta * timerange**2 #plt.figure() #plt.scatter(timerange, y) time_varying_model = pm.Model() with time_varying_model: # Set up priors alpha = pm.Normal("alpha", mu=0, sigma=10) beta = pm.Normal("beta", mu=0, sigma=10) sigma = pm.HalfNormal("sigma", sigma=1) # System model mu = sys_model(alpha, beta) # Likelihood of observations Y = pm.Normal("y", mu=mu, sigma=sigma, observed=y) # Sampler to use
V_obs2 = V_obs2 - V_gas2 M_R_bulge = [] for i in Radial_distance: M_R_bulge.append(simpsons_integration(0.0001,i,5000,Bulge)/i) M_R_bulge = np.array(M_R_bulge) M_R_disk = [] for i in Radial_distance: M_R_disk.append(simpsons_integration(0.0001,i,5000,Disc)/i) M_R_disk = np.array(M_R_disk) total_model = pm.Model() with total_model: #priors sigma = pm.HalfNormal("sigma" , sigma = 0.4) gamma = pm.Gamma("gamma", alpha = 3, beta = 1) ah = pm.Gamma("ah", alpha = 3, beta = 1) Mh = pm.Gamma("Mh", alpha = 3, beta = 1) M_by_L_bulge = pm.Gamma("M_by_L_bulge", alpha = 3, beta = 1) M_by_L_disk = pm.Gamma("M_by_L_disc", alpha = 3, beta = 1) bulge_rot = M_by_L_bulge*M_R_bulge disk_rot = M_by_L_disk*M_R_disk halo_rot = (Mh*Radial_distance**(gamma - 1))/((ah**gamma)*(1 + ((Radial_distance/ah)**(gamma-1)) )) total_rot = bulge_rot + disk_rot + halo_rot
import matplotlib.pyplot as plt import pymc3 as pm import numpy as np # import pydevd # pydevd.set_pm_excepthook() np.seterr(invalid='raise') data = np.random.normal(size=(2, 20)) model = pm.Model() with model: x = pm.Normal('x', mu=.5, tau=2.**-2, shape=(2, 1)) z = pm.Beta('z', alpha=10, beta=5.5) d = pm.Normal('data', mu=x, tau=.75**-2, observed=data) step = pm.NUTS() def run(n=1000): if n == "short": n = 50 with model: trace = pm.sample(n, step) plt.subplot(2, 2, 1) plt.plot(trace[x][:, 0, 0]) plt.subplot(2, 2, 2) plt.hist(trace[x][:, 0, 0]) plt.subplot(2, 2, 3)
plt.xlabel("$x_{}$".format(idx)) plt.ylabel("$y$", rotation=0) plt.subplot(2, 2, idx + 2) plt.scatter(x[0], x[1]) plt.xlabel("$x_{}$".format(idx - 1)) plt.xlabel("$x_{}$".format(idx), rotation=0) X = np.vstack((x_0, x_1)) scatter_plot(X, y) plt.savefig("masking_effect_variables_data.png") plt.close() with pm.Model() as model_ma: alpha = pm.Normal("alpha", mu=0, sd=10) beta = pm.Normal("beta", mu=0, sd=10, shape=2) epsilon = pm.HalfCauchy("epsilon", 5) mu = alpha + pm.math.dot(beta, X) y_pred = pm.Normal("y_pred", mu=mu, sd=epsilon, observed=y) start = pm.find_MAP() step = pm.NUTS(scaling=start) trace_ma = pm.sample(5000, step=step, start=start) pm.traceplot(trace_ma) plt.savefig("masking_effect_variables_traceplot.png") plt.close()
for g in range(12, n_genes) : plt.subplot(2, 3, g-11) plt.plot(times, np.nanmean(replicatsCRTG[c, : , : , g], axis = 0), label = 'measures', marker = 'x') plt.plot(times, theory [c, : , g, 0], label = 'model predicted, of which') plt.plot(times, theory1[c, : , g, 0], label = '1. own contribution') plt.plot(times, theory2[c, : , g, 0], label = '2. production due to TF') plt.step(times, piecewiseeta[c, :, 0, -1], where='post', label = 'TF activity') plt.title('gene'+str(genes_numbers[g])) plt.legend() plt.grid(True) plt.savefig(figure2_title+'.png') plt.show() """ # 4th step: find better values of the parameters with pm.Model() as Basic_model: # Priors for unknown model parameters alpha = pm.Uniform('alpha', lower=0.2 * alpha0, upper=2. * alpha0, shape=n_genes_under_TF_control) # alpha = pm.Gamma('alpha', mu = alpha0, sigma = alpha0, shape = n_genes_under_TF_control) beta = pm.Uniform('beta', lower=0.2 * beta0, upper=2. * beta0, shape=n_genes_under_TF_control) # beta = pm.Gamma('beta', mu = beta0, sigma = beta0, shape = n_genes_under_TF_control) gamma = pm.Uniform('gamma', lower=0.2 * gamma0, upper=2. * gamma0,
import numpy as np import pymc3 as pm import matplotlib.pyplot as plt iris = sns.load_dataset('iris') df = iris.query("species == ('setosa', 'versicolor')") y_0 = pd.Categorical(df['species']).codes x_n = 'sepal_length' x_0 = df[x_n].values y_0 = np.concatenate((y_0, np.ones(6))) x_0 = np.concatenate((x_0, [4.2, 4.5, 4.0, 4.3, 4.2, 4.4])) x_0_m = x_0 - x_0.mean() plt.plot(x_0, y_0, 'o', color='k') plt.show() with pm.Model() as model_rlg: alpha_tmp = pm.Normal('alpha_tmp', mu=0, sd=100) beta = pm.Normal('beta', mu=0, sd=10) mu = alpha_tmp + beta * x_0_m theta = pm.Deterministic('theta', 1 / (1 + pm.math.exp(-mu))) pi = pm.Beta('pi', 1, 1) p = pi * 0.5 + (1 - pi) * theta alpha = pm.Deterministic('alpha', alpha_tmp - beta * x_0.mean()) bd = pm.Deterministic('bd', -alpha / beta) yl = pm.Bernoulli('yl', p=p, observed=y_0) trace_rlg = pm.sample(2000, start=pm.find_MAP())
plt.figure() data = np.array([ 51.06, 55.12, 53.73, 50.24, 52.05, 56.40, 48.45, 52.34, 55.65, 51.49, 51.86, 63.43, 53.00, 56.09, 51.93, 52.31, 52.33, 57.48, 57.44, 55.14, 53.93, 54.62, 56.09, 68.58, 51.36, 55.47, 50.73, 51.94, 54.95, 50.39, 52.91, 51.50, 52.68, 47.72, 49.73, 51.82, 54.99, 52.84, 53.19, 54.52, 51.46, 53.73, 51.61, 49.81, 52.42, 54.30, 53.84, 53.16 ]) sns.kdeplot(data) plt.savefig('img302.png', dpi=300, figsize=(5.5, 5.5)) plt.figure() with pm.Model() as model_g: mu = pm.Uniform('mu', 40, 75) sigma = pm.HalfNormal('sigma', sd=10) y = pm.Normal('y', mu=mu, sd=sigma, observed=data) trace_g = pm.sample(1100, njobs=1) chain_g = trace_g[100:] pm.traceplot(chain_g) plt.savefig('img304.png', dpi=300, figsize=(5.5, 5.5)) plt.figure() df = pm.summary(chain_g) y_pred = pm.sample_ppc(chain_g, 100, model_g, size=len(data)) sns.kdeplot(data, c='b')
def fit(self, X, y, y_error=1, x_error=None, *, sample_kwargs={ 'draws': 1000, 'target_accept': 0.9 }): kwds = {} if self.kwds is not None: kwds.update(self.kwds) kwds['fit_intercept'] = False model = self._choose_regressor() self.clf_ = model(**kwds) self.fit_intercept = False if x_error is not None: x_error = np.atleast_2d(x_error) with pm.Model(): # slope and intercept of eta-ksi relation slope = pm.Flat('slope', shape=(X.shape[0], )) inter = pm.Flat('inter') # intrinsic scatter of eta-ksi relation int_std = pm.HalfFlat('int_std') # standard deviation of Gaussian that ksi are drawn from (assumed mean zero) tau = pm.HalfFlat('tau', shape=(X.shape[0], )) # intrinsic ksi mu = pm.Normal('mu', mu=0, sigma=tau, shape=(X.shape[0], )) # Some wizzarding with the dimensions all around. ksi = pm.Normal('ksi', mu=mu, tau=tau, shape=X.T.shape) # intrinsic eta-ksi linear relation + intrinsic scatter eta = pm.Normal('eta', mu=(tt.dot(slope.T, ksi.T) + inter), sigma=int_std, shape=y.shape) # observed xi, yi x = pm.Normal('xi', mu=ksi.T, sigma=x_error, observed=X, shape=X.shape) y = pm.Normal('yi', mu=eta, sigma=y_error, observed=y, shape=y.shape) self.trace = pm.sample(**sample_kwargs) # TODO big: make it optional to choose a way to define best # TODO quick: use np.histogramdd H2D, bins1, bins2 = np.histogram2d(self.trace['slope'][:, 0], self.trace['inter'], bins=50) w = np.where(H2D == H2D.max()) # choose the maximum posterior slope and intercept slope_best = bins1[w[0][0]] intercept_best = bins2[w[1][0]] self.clf_.coef_ = np.array([intercept_best, slope_best]) return self
def MultiOutput_Bayesian_Calibration(n_y,DataComp,DataField,DataPred,output_folder): # This is data preprocessing part n = np.shape(DataField)[0] # number of measured data m = np.shape(DataComp)[0] # number of simulation data p = np.shape(DataField)[1] - n_y # number of input x q = np.shape(DataComp)[1] - p - n_y # number of calibration parameters t xc = DataComp[:,n_y:] # simulation input x + calibration parameters t xf = DataField[:,n_y:] # observed input yc = DataComp[:,:n_y] # simulation output yf = DataField[:,:n_y] # observed output x_pred = DataPred[:,n_y:] # design points for predictions y_true = DataPred[:,:n_y] # true measured value for design points for predictions n_pred = np.shape(x_pred)[0] # number of predictions N = n+m+n_pred # Put points xc, xf, and x_pred on [0,1] for i in range(p): x_min = min(min(xc[:,i]),min(xf[:,i])) x_max = max(max(xc[:,i]),max(xf[:,i])) xc[:,i] = (xc[:,i]-x_min)/(x_max-x_min) xf[:,i] = (xf[:,i]-x_min)/(x_max-x_min) x_pred[:,i] = (x_pred[:,i]-x_min)/(x_max-x_min) # Put calibration parameters t on domain [0,1] for i in range(p,(p+q)): t_min = min(xc[:,i]) t_max = max(xc[:,i]) xc[:,i] = (xc[:,i]-t_min)/(t_max-t_min) # store mean and std of yc for future scale back use yc_mean = np.zeros(n_y) yc_sd = np.zeros(n_y) # standardization of output yf and yc for i in range(n_y): yc_mean[i] = np.mean(yc[:,i]) yc_sd[i] = np.std(yc[:,i]) yc[:,i] = (yc[:,i]-yc_mean[i])/yc_sd[i] yf[:,i] = (yf[:,i]-yc_mean[i])/yc_sd[i] # This is modeling part with pm.Model() as model: # Claim prior part eta1 = pm.HalfCauchy("eta1", beta=5) # for eta of gaussian process lengthscale = pm.Gamma("lengthscale", alpha=2, beta=1, shape=(p+q)) # for lengthscale of gaussian process tf = pm.Beta("tf", alpha=2, beta=2, shape=q) # for calibration parameters sigma1 = pm.HalfCauchy('sigma1', beta=5) # for noise y_pred = pm.Normal('y_pred', 0, 1.5, shape=(n_pred,n_y)) # for y prediction # Setup prior of right cholesky matrix sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=n_y) colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=n_y, eta=2,sd_dist=sd_dist) colchol = pm.expand_packed_triangular(n_y, colchol_packed) # Concate data into a big matrix[[xf tf], [xc tc], [x_pred tf]] xf1 = tt.concatenate([xf, tt.fill(tt.zeros([n,q]), tf)], axis = 1) x_pred1 = tt.concatenate([x_pred, tt.fill(tt.zeros([n_pred,q]), tf)], axis = 1) X = tt.concatenate([xf1, xc, x_pred1], axis = 0) # Concate data into a big matrix[[yf], [yc], [y_pred]] y = tt.concatenate([yf, yc, y_pred], axis = 0) # Covariance funciton of gaussian process cov_z = eta1**2 * pm.gp.cov.ExpQuad((p+q), ls=lengthscale) # Gaussian process with covariance funciton of cov_z gp = MultiMarginal(cov_func = cov_z) # Bayesian inference matrix_shape = [n+m+n_pred,n_y] outcome = gp.marginal_likelihood("outcome", X=X, y=y, colchol=colchol, noise=sigma1, matrix_shape=matrix_shape) trace = pm.sample(250,cores=1) # This part is for data collection and visualization pm.summary(trace).to_csv(output_folder + '/trace_summary.csv') print(pm.summary(trace)) name_columns = [] n_columns = n_pred for i in range(n_columns): for j in range(n_y): name_columns.append('y'+str(j+1)+'_pred'+str(i+1)) y_prediction = pd.DataFrame(np.array(trace['y_pred']).reshape(500,n_pred*n_y),columns=name_columns) #Draw Picture of cvrmse_dist and calculate index for i in range(n_y): index = list(range(0+i,n_pred*n_y+i,n_y)) y_prediction1 = pd.DataFrame(y_prediction.iloc[:,index]) y_prediction1 = y_prediction1*yc_sd[i]+yc_mean[i] # Scale y_prediction back y_prediction1.to_csv(output_folder + '/y_pred'+str(i+1)+'.csv') # Store y_prediction # Calculate the distribution of cvrmse cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction1-y_true[:,i]),axis=1)/n_pred)/np.mean(y_true[:,i]) # Calculate the index and store it into csv index_cal(y_prediction1,y_true[:,i]).to_csv(output_folder + '/index'+str(i+1)+'.csv') # Draw pictrue of cvrmse distribution of each y plt.subplot(n_y, 1, i+1) plt.hist(cvrmse) plt.savefig(output_folder + '/cvrmse_dist.pdf') plt.close() #Draw Picture of Prediction_Plot for i in range(n_y): index = list(range(0+i,n_pred*n_y+i,n_y)) y_prediction_mean = np.array(pm.summary(trace)['mean'][index])*yc_sd[i]+yc_mean[i] y_prediction_975 = np.array(pm.summary(trace)['hpd_97.5'][index])*yc_sd[i]+yc_mean[i] y_prediction_025 = np.array(pm.summary(trace)['hpd_2.5'][index])*yc_sd[i]+yc_mean[i] plt.subplot(n_y, 1, i+1) # estimated probability plt.scatter(x=range(n_pred), y=y_prediction_mean) # error bars on the estimate plt.vlines(range(n_pred), ymin=y_prediction_025, ymax=y_prediction_975) # actual outcomes plt.scatter(x=range(n_pred), y=y_true[:,i], marker='x') plt.xlabel('predictor') plt.ylabel('outcome') # This is just to print original cvrmse to test whether outcome good if i == 0: cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction_mean-y_true[:,0]))/len(y_prediction_mean-y_true[:,0]))/np.mean(y_true[:,0]) print(cvrmse) plt.savefig(output_folder + '/Prediction_Plot.pdf') plt.close()
def three_var_model(): with pm.Model() as model: pm.HalfNormal('one', shape=(10, 2), total_size=100) pm.Normal('two', shape=(10, )) pm.Normal('three', shape=(10, 1, 2)) return model
def lm(x, y, trace=None, credible_interval=0.95, ax=None, bandalpha=0.6, scatter_kws={}, **kwargs): """Make a custom linear model plot with confidence bands. Args: x (array like): x values y (array like): y values trace (pymc3.MultiTrace, optional): GLM trace from PyMC3. ax (matplotlib.axis, optional): Axis to plot on. Defaults to current axis. bandalpha (float, optional): Opacity level of confidence band. scatter_kws (dict, optional): Dictionary of keyword arguments passed onto `scatter`. **kwargs: Keyword arguments passed onto plot of regression line. Returns: matplotlib.axis: Axis with the linear model plot. """ if ax is None: ax = plt.gca() # Determine color (this is necessary so that the scatter and the line have the same color) color = next(ax._get_lines.prop_cycler)["color"] # Scatter print(scatter) ax = scatter(x, y, color=color, ax=ax, **scatter_kws) # Run GLM in PyMC3 if trace is None: df = pd.DataFrame(dict(x=x, y=y)) with pm.Model() as glm: pm.GLM.from_formula("y ~ x", data=df) trace = pm.sample() summary = pm.summary(trace) # Plot MAP regression line xs = np.linspace(np.min(x), np.max(x), 100) intercept = summary.loc["Intercept", "mean"] beta = summary.loc["x", "mean"] ax.plot(xs, intercept + beta * xs, color=color, zorder=4, **kwargs) # Plot posterior predictive credible region band intercept_samples = trace.get_values("Intercept") beta_samples = trace.get_values("x") ypred = intercept_samples + beta_samples * xs[:, None] ypred_lower = np.quantile(ypred, (1 - credible_interval) / 2, axis=1) ypred_upper = np.quantile(ypred, 1 - (1 - credible_interval) / 2, axis=1) ax.fill_between( xs, ypred_lower, ypred_upper, color=color, zorder=1, alpha=bandalpha, linewidth=0, ) return ax, trace, summary
def train(self, fol_path='../data/*'): fol_list = glob.glob(fol_path) print(fol_list) seq_list = [] for fol in fol_list: f_list = glob.glob(fol + '/*.jpg') im_list = [] for f in sorted(f_list): #Crop to ultrasound active area im = np.mean(cv2.resize( cv2.imread(f)[180:700, 500:1020, :], (self.w, self.h)), axis=-1) im_list.append(im) seq_list.append(np.array(im_list)) # Get latent states self.latent_list = [] for s in seq_list[:-1]: self.latent_list.append( self.vae_model.encoder.predict( s.reshape(-1, self.w, self.h, 1) / 255.0)[0]) self.latent = np.vstack(self.latent_list) np.savetxt(self.log_path + 'latent.txt', self.latent) #Generate training pairs print('Generating training pairs') G = self.generate_pairs(self.latent_list) W = np.arange(self.latent.shape[0]).astype(int) Gt = tt.as_tensor(G) W = W.astype(int) Xt = tt.as_tensor(self.latent) with pm.Model() as reward_model: l = pm.Gamma("l", alpha=2.0, beta=0.5) cov_func = pm.gp.cov.Matern32(self.latent.shape[1], ls=l) Xu = pm.gp.util.kmeans_inducing_points(self.Ni, self.latent) sig = pm.HalfCauchy("sig", beta=np.ones((self.latent.shape[0], )), shape=self.latent.shape[0]) gp = pm.gp.MarginalSparse(cov_func=cov_func) f = gp.marginal_likelihood('reward', Xt, Xu, shape=self.latent.shape[0], y=None, noise=sig, is_observed=False) diff = f[Gt[:, 0]] - f[Gt[:, 1]] p = pm.math.sigmoid(diff) wl = pm.Bernoulli('observed wl', p=p, observed=np.ones((G.shape[0], )), total_size=self.latent.shape[0]) inference = pm.ADVI() train_probs = inference.approx.sample_node(p) train_accuracy = (train_probs > 0.5).mean(-1) eval_tracker = pm.callbacks.Tracker(train_accuracy=train_accuracy.eval) approx = inference.fit(1000, obj_optimizer=pm.adam(learning_rate=0.1), callbacks=[eval_tracker]) trace = approx.sample(5000) l = np.mean(trace['l']) sig = np.mean(trace['sig']) reward = np.mean(trace['reward'], axis=0) np.savetxt('./logs/l.txt', np.array([l])) np.savetxt('./logs/sig.txt', np.array([sig])) np.savetxt('./logs/reward.txt', reward) print('Saved trained reward parameters') return l, sig, reward
FontPath = '/usr/share/fonts/truetype/takao-gothic/TakaoPGothic.ttf' else: print('このPythonコードが対応していないOSを使用しています.') sys.exit() jpfont = FontProperties(fname=FontPath) #%% 回帰モデルからのデータ生成 n = 50 np.random.seed(99) u = st.norm.rvs(scale=0.7, size=n) x = st.uniform.rvs(loc=-np.sqrt(3.0), scale=2.0 * np.sqrt(3.0), size=n) y = 1.0 + 2.0 * x + u #%% 回帰モデルの係数と誤差項の分散の事後分布の設定(ラプラス+半コーシー分布) b0 = np.zeros(2) tau_coef = np.ones(2) tau_sigma = 1.0 regression_laplace_halfcauchy = pm.Model() with regression_laplace_halfcauchy: sigma = pm.HalfCauchy('sigma', beta=tau_sigma) a = pm.Laplace('a', mu=b0[0], b=tau_coef[0]) b = pm.Laplace('b', mu=b0[1], b=tau_coef[1]) y_hat = a + b * x likelihood = pm.Normal('y', mu=y_hat, sd=sigma, observed=y) #%% 事後分布からのサンプリング n_draws = 5000 n_chains = 4 n_tune = 1000 with regression_laplace_halfcauchy: trace = pm.sample(draws=n_draws, chains=n_chains, tune=n_tune, random_seed=123)
# seems to fit the empirical data the best. logistic_dist = getattr(sp.stats, 'logistic') logistic_param = logistic_dist.fit(y) # mu, shape # Logistic distribution does not account for skew, so we will fit a lognormal distribution also. lognorm_dist = getattr(sp.stats, 'lognorm') lognorm_param = lognorm_dist.fit(y) # shape, loc, scale # Find the SD of SD each week sd = [0] * len(y) for i in range(0,len(y)-1,5): sd[i:i+5] = [np.std(y[i:i+5])] * 5 sd_sd = np.std(sd) # The Ergotic Theorem for Markov Chains allows us to find the posterior distribution # by simulating a large sample size with Monte Carlo with pm.Model() as gs_model: # Assume returns follow a log-normal distribution, # common assumption for stock returns because it can account for the skew # semi-informed with SD of fitted lognorm likelihood estimation mu = pm.Lognormal('mu', sigma=lognorm_param[0]) PositiveNormal = pm.Bound(pm.Normal, lower=0.0) sigma = PositiveNormal('sigma', mu=np.std(y), sigma=sd_sd) # Assume prior returns follows a gaussian random walk because stock returns are nonstationary # so this helps models the stochastic process # semi-informed with SD likelihood estimation returns = pm.GaussianRandomWalk('returns', mu=mu, sigma=sigma, shape=len(y)) # Assume shape follows a positive normal distribution centered around the prior shape
# %run notebook_setup # - # # Citing exoplanet & its dependencies # The *exoplanet* package is mostly just glue that connects many other ideas and software. # In a situation like this, it can be easy to forget about the important infrastructure upon which our science is built. # In order to make sure that you can easily give credit where credit is due, we have tried to make it as painless as possible to work out which citations are expected for a model fit using *exoplanet* by including a :func:`exoplanet.citations.get_citations_for_model` function that introspects the current PyMC3 model and constructs a list of citations for the functions used in that model. # # For example, you might compute a quadratically limb darkened light curve using `starry` (via the :class:`exoplanet.LimbDarkLightCurve` class): # + import pymc3 as pm import exoplanet as xo with pm.Model() as model: u = xo.distributions.QuadLimbDark("u") orbit = xo.orbits.KeplerianOrbit(period=10.0) light_curve = xo.LimbDarkLightCurve(u) transit = light_curve.get_light_curve(r=0.1, orbit=orbit, t=[0.0, 0.1]) txt, bib = xo.citations.get_citations_for_model() # - # The :func:`exoplanet.citations.get_citations_for_model` function would generate an acknowledgement that cites: # # * [PyMC3](https://docs.pymc.io/#citing-pymc3): for the inference engine and modeling framework, # * [Theano](http://deeplearning.net/software/theano/citation.html): for the numerical infrastructure, # * [AstroPy](http://www.astropy.org/acknowledging.html): for units and constants, # * [Kipping (2013)](https://arxiv.org/abs/1308.0009): for the reparameterization of the limb darkening parameters for a quadratic law, and # * [Luger, et al. (2018)](https://arxiv.org/abs/1810.06559): for the light curve calculation.
@author: k20087271 """ import pymc3 as pm import numpy as np import arviz as az import matplotlib.pyplot as plt import theano import seaborn as sns import theano.tensor as t RANDOM_SEED = 58 pi = np.pi ACB_model = pm.Model() L = 295 E = 0.6 m31 = 2.6*10**(-3) delta31 = 1.27*L*m31/E #we set M21 = 0 if __name__ == '__main__': with ACB_model: #priors for unknown model parameters: theta12 = pm.Uniform("theta12",0,pi/2.) #mu = 0.57, sigma = 0.1) #0,pi/2.) theta13 = pm.Uniform("theta13",0,pi/2.) #mu = 0.82, sigma = 0.1) #0,pi/2.) theta23 = pm.Uniform("theta23",0,pi/2.) #mu = 0.14, sigma = 0.1) #0,pi/2.)
def sleep_mode_use(): # 对时间偏移量进行排序 sleep_data.sort_values('time_offset', inplace=True) # 提取时间偏移量 time = np.array(sleep_data.loc[:, 'time_offset']) # 观察值是indicator sleep_obs = np.array(sleep_data.loc[:, 'indicator']) with pm.Model() as sleep_model: # 创建alpha和beta的先验分布 alpha = pm.Normal('alpha', mu=0.0, tau=0.01, testval=0.0) beta = pm.Normal('beta', mu=0.0, tau=0.01, testval=0.0) # 创建一个逻辑函数的确定性变量 p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha))) # 创建基于当前数据的伯努利变量 # pm.Bernoulli('obs', p, observed=sleep_obs) # 使用 Metropolis Hastings 抽样 step = pm.Metropolis() # 从后验中抽样 # 从样本中使用MH采样得到alpha和beta的样本 # sleep_trace 则保存了模型生成的所有参数值。step 变量指的是特定的算法, sleep_trace = pm.sample(N_SAMPLES, step=step) # 抽取alpha和beta的样本 alpha_samples = sleep_trace["alpha"][100:, None] beta_samples = sleep_trace["beta"][100:, None] figsize(13, 6) # -----------------------------------100个样本的alpha和beta的可视化分布----------------------------------------------------------------- plt.subplot(211) plt.title(r""" %d 个样本的 $\alpha$ 分布""" % N_SAMPLES) plt.hist(alpha_samples, histtype='stepfilled', color='darkred', bins=30, alpha=0.8, density=True) plt.ylabel('概率密度') plt.show() plt.subplot(212) plt.title(r""" %d 个样本的 $\beta$ 分布""" % N_SAMPLES) plt.hist(beta_samples, histtype='stepfilled', color='darkblue', bins=30, alpha=0.8, density=True) plt.ylabel('概率密度') plt.show() # -----------------------------------5000个样本的睡眠概率分布----------------------------------------------------------------- # 设定概率预测的时间长度 time_est = np.linspace(time.min() - 15, time.max() + 15, int(1e3))[:, None] # 取参数的均值 alpha_est = alpha_samples.mean() beta_est = beta_samples.mean() # 使用参数的均值所生成的概率 sleep_est = logistic(time_est, beta_est, alpha_est) plt.plot(time_est, sleep_est, color='navy', lw=3, label="最有可能的逻辑模型") plt.scatter(time, sleep_obs, edgecolor='slateblue', s=50, alpha=0.2, label='实际观测值') plt.title('%d 个样本的睡眠概率分布' % N_SAMPLES) plt.legend(prop={'size': 18}) plt.ylabel('概率') plt.xlabel('下午时间') plt.xticks([-60, -30, 0, 30, 60, 90, 120], sleep_labels) plt.show() print('睡眠概率大于 50% 的时间点位于下午 22:{} '.format( int(time_est[np.where(sleep_est > 0.5)[0][0]][0]))) colors = ["#348ABD", "#A60628", "#7A68A6"] cmap = matplotlib.colors.LinearSegmentedColormap.from_list("BMH", colors) figsize(12, 6) probs = sleep_trace['p'] plt.scatter(time, probs.mean(axis=0), cmap=cmap, c=probs.mean(axis=0), s=50) plt.title('睡眠的概率是关于时间的函数') plt.xlabel('下午时间') plt.ylabel('概率') plt.xticks([-60, -30, 0, 30, 60, 90, 120], sleep_labels) print('22点的睡眠概率为: {:.2f}%.'.format(100 * logistic(0, beta_est, alpha_est))) print('21:30的睡眠概率为: {:.2f}%.'.format(100 * logistic(-30, beta_est, alpha_est))) print('22:30的睡眠概率为: {:.2f}%.'.format(100 * logistic(30, beta_est, alpha_est))) # ----------------------------------------beta和alpha的置信区间--------------------------------------------------------------------------------- sleep_all_est = logistic(time_est.T, beta_samples, alpha_samples) quantiles = stats.mstats.mquantiles(sleep_all_est, [0.025, 0.975], axis=0) plt.fill_between(time_est[:, 0], *quantiles, alpha=0.6, color='slateblue', label='95% 置信区间') plt.plot(time_est, sleep_est, lw=2, ls='--', color='black', label="睡眠的平均后验概率") plt.xticks([-60, -30, 0, 30, 60, 90, 120], sleep_labels) plt.scatter(time, sleep_obs, edgecolor='skyblue', s=50, alpha=0.1) plt.legend(prop={'size': 14}) plt.xlabel('PM Time') plt.ylabel('Probability') plt.title('后验概率的 95% 置信区间') plt.show() # -----------------------------特定时间的后验概率分布------------------------------------------------------ def sleep_posterior(time_offset, time): # 特定时间的后验概率分布 figsize(16, 8) prob = logistic(time_offset, beta_samples, alpha_samples) plt.hist(prob, bins=100, histtype='step', lw=4) plt.title(' %s点睡眠的概率分布' % time) plt.xlabel('睡眠概率') plt.ylabel('样本量') plt.show() sleep_posterior(0, '22:00') sleep_posterior(-30, '21:30') print('alpha 参数估值: {:.6f}.'.format(alpha_est)) print('beta 参数估值: {:.6f}.'.format(beta_est)) # --------------------------------判断马尔可夫链蒙特卡罗模型是否收敛------------------------------------------------------------------------- # ------------------轨迹图-------------- figsize(12, 6) plt.subplot(211) plt.title(r'Trace of $\alpha$') plt.plot(alpha_samples, color='darkred') plt.xlabel('样本量') plt.ylabel('参数') plt.show() plt.subplot(212) plt.title(r'Trace of $\beta$') plt.plot(beta_samples, color='b') plt.xlabel('样本量') plt.ylabel('参数') plt.tight_layout(h_pad=0.8) plt.show()
plt.scatter(mus[0, 0], mus[0, 1], c='r', s=100) plt.scatter(mus[1, 0], mus[1, 1], c='b', s=100) plt.scatter(mus[2, 0], mus[2, 1], c='y', s=100) # Then, ellipses plot_ellipse(ax, mus, sigmas) ax.axis('equal') plt.show() ## Build model and sample # Number of iterations for sampler draws = 2000 # Prepare lists of starting points for mu to prevent label-switching problem testvals = [[-2, -2], [0, 0], [2, 2]] # Model structure with pm.Model() as mvgmm: # Prior over component weights p = pm.Dirichlet('p', a=np.array([1.] * K)) # Prior over component means mus = [ pm.MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(D)), tau=pm.floatX(0.1 * np.eye(D)), shape=(D, ), testval=pm.floatX(testvals[i])) for i in range(K) ] # Cholesky decomposed LKJ prior over component covariance matrices packed_L = [ pm.LKJCholeskyCov('packed_L_%d' % i,
def sleep_time_mode_use(): raw_data = pd.read_csv( 'D:/weChatFile/WeChat Files/wxid_fg4c7ci7wpud21/FileStorage/File/2021-04/sleep_wake.csv' ) raw_data['length'] = 8 - (raw_data['Sleep'] / 60) + (raw_data['Wake'] / 60) duration = raw_data['length'] # -----------------------------睡眠时间长度------------------------------------------------------------- figsize(10, 8) plt.hist(duration, bins=20, color='darkred') plt.xlabel('小时') plt.title('睡眠时间长度分布') plt.ylabel('观测值') plt.show() # ---------------------------右偏睡眠时间长度概率密度---------------------------------------- a = 3 fig, ax = plt.subplots(1, 1) x = np.linspace(6, 12, int(1e3)) figsize(10, 8) plt.hist(duration, bins=20, color='darkred', density=1, stacked=True) plt.xlabel('小时') plt.title('右偏的睡眠时间长度的概率密度(PDF)') plt.ylabel('观测值') plt.plot(x, stats.skewnorm.pdf(x, a, loc=7.4, scale=1), 'r-', lw=3, label='skewnorm pdf') plt.show() # ------------------------------睡眠长度概率模型-------------------------------------------------- with pm.Model() as duration_model: # 定义三个参数的先验概率分布其中我们增加了一个偏度参数alpha_skew alpha_skew = pm.Normal('alpha_skew', mu=0, tau=0.5, testval=3.0) mu_ = pm.Normal('mu', mu=0, tau=0.5, testval=7.4) tau_ = pm.Normal('tau', mu=0, tau=0.5, testval=1.0) # Duration 为一个确定性变量 duration_ = pm.SkewNormal('duration', alpha=alpha_skew, mu=mu_, sd=1 / tau_, observed=duration) # Metropolis Hastings 抽样 step = pm.Metropolis() duration_trace = pm.sample(N_SAMPLES, step=step) # --------------------抽取最有可能的估值参数--------------------------------------------------------- # 抽取最有可能的估值参数 alpha_skew_samples = duration_trace['alpha_skew'][1000:] mu_samples = duration_trace['mu'][1000:] tau_samples = duration_trace['tau'][1000:] alpha_skew_est = alpha_skew_samples.mean() mu_est = mu_samples.mean() tau_est = tau_samples.mean() # -----------------------睡眠长度后验分布长度可视化------------------------------------------------------- x = np.linspace(6, 12, 1000) y = stats.skewnorm.pdf(x, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est) plt.plot(x, y, color='forestgreen') plt.fill_between(x, y, color='forestgreen', alpha=0.2) plt.xlabel('小时') plt.ylabel('概率') plt.title('睡眠时间长度的后验分布') plt.vlines(x=x[np.argmax(y)], ymin=0, ymax=y.max(), linestyles='--', linewidth=2, color='red', label='最可能的睡眠时间长度') plt.show() print('最可能的睡眠时间长度为 {:.2f} 小时.'.format(x[np.argmax(y)])) # -----------------------查询后验概率模型-------------------------------------------------------------- print('睡眠时间至少6.5小时的概率为:{:.2f}%.'.format(100 * (1 - stats.skewnorm.cdf( 6.5, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est)))) print('睡眠时间至少8小时的概率为:{:.2f}%.'.format(100 * (1 - stats.skewnorm.cdf( 8.0, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est)))) print('睡眠时间至少9小时的概率为:{:.2f}%.'.format(100 * (1 - stats.skewnorm.cdf( 9.0, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est)))) # -------------------------可视化后验和数据------------------------------------------------------------------------------------- x = np.linspace(6, 12, 1000) y = stats.skewnorm.pdf(x, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est) figsize(10, 8) # 绘制后验概率分布 plt.plot(x, y, color='forestgreen', label='Model', lw=3) plt.fill_between(x, y, color='forestgreen', alpha=0.2) # 绘制观测值直方图 plt.hist(duration, bins=10, color='red', alpha=0.8, label='观测值', density=1, stacked=True) plt.xlabel('小时') plt.ylabel('概率') plt.title('模型') plt.vlines(x=x[np.argmax(y)], ymin=0, ymax=y.max(), linestyles='--', linewidth=2, color='k', label='最可能的睡眠时间长度') plt.legend(prop={'size': 12}) plt.show()
def test_mixed2(self): with pm.Model(): data = np.random.rand(10, 20, 30, 40, 50) mb = pm.Minibatch(data, [2, None, 20]) Normal('n', observed=mb, total_size=(10, None, 30))
def test_raises2(self): with pm.Model() as model: with pytest.raises(ValueError): B = pm.gp.cov.Coregion(1, W=self.W, kappa=self.kappa, B=self.B)
plt.plot(x, y, 'r-', lw=3, label='True distribution') plt.hist(samples, bins=30, normed=True, label='Estimated distribution') plt.xlabel('$x$', fontsize=14) plt.ylabel('$pdf(x)$', fontsize=14) plt.legend(fontsize=14) plt.savefig('img203.png', dpi=300, figsize=(5.5, 5.5)) plt.figure() np.random.seed(123) n_experiments = 4 theta_real = 0.35 data = stats.bernoulli.rvs(p=theta_real, size=n_experiments) print(data) with pm.Model() as our_first_model: theta = pm.Beta('theta', alpha=1, beta=1) y = pm.Bernoulli('y', p=theta, observed=data) start = pm.find_MAP() step = pm.Metropolis() trace = pm.sample(1000, step=step, start=start) burnin = 100 chain = trace[burnin:] pm.traceplot(chain, lines={'theta': theta_real}) plt.savefig('img204.png', dpi=300, figsize=(5.5, 5.5)) plt.figure() with our_first_model: