Exemplo n.º 1
0
def test_time_varying_model():

    np.random.seed(1039)

    data = gen_toy_data()

    formula_str = "1 + C(weekday)"
    X_df = patsy.dmatrix(formula_str, data, return_type="dataframe")
    X_np = X_df.values

    xi_shape = X_np.shape[1]

    xi_0_true = np.array([2.0, -2.0, 2.0, -2.0, 2.0, -2.0, 2.0]).reshape(xi_shape, 1)
    xi_1_true = np.array([2.0, -2.0, 2.0, -2.0, 2.0, -2.0, 2.0]).reshape(xi_shape, 1)

    xis_rv_true = np.stack([xi_0_true, xi_1_true], axis=1)

    with pm.Model(**TV_CONFIG) as sim_model:
        _ = create_dirac_zero_hmm(
            X_np, mu=1000, xis=xis_rv_true, observed=np.zeros(X_np.shape[0])
        )

    sim_point = pm.sample_prior_predictive(samples=1, model=sim_model)

    y_t = sim_point["Y_t"].squeeze().astype(int)

    split = int(len(y_t) * 0.7)

    train_y, test_V = y_t[:split], sim_point["V_t"].squeeze()[split:]
    train_X, test_X = X_np[:split, :], X_np[split:, :]

    X = shared(train_X, name="X", borrow=True)
    Y = shared(train_y, name="y_t", borrow=True)

    with pm.Model() as model:
        xis_rv = pm.Normal("xis", 0, 10, shape=xis_rv_true.shape)
        _ = create_dirac_zero_hmm(X, 1000, xis_rv, Y)

    number_of_draws = 500

    with model:
        steps = [
            FFBSStep([model.V_t]),
            pm.NUTS(
                vars=[
                    model.gamma_0,
                    model.Gamma,
                ],
                target_accept=0.90,
            ),
        ]

    with model:
        posterior_trace = pm.sample(
            draws=number_of_draws,
            step=steps,
            random_seed=100,
            return_inferencedata=True,
            chains=1,
            cores=1,
            progressbar=True,
            idata_kwargs={"dims": {"Y_t": ["date"], "V_t": ["date"]}},
        )

    # Update the shared variable values
    Y.set_value(np.ones(test_X.shape[0], dtype=Y.dtype))
    X.set_value(test_X)

    model.V_t.distribution.shape = (test_X.shape[0],)

    hdi_data = az.hdi(posterior_trace, hdi_prob=0.95, var_names=["xis"]).to_dataframe()
    hdi_data = hdi_data.unstack(level="hdi")

    xis_true_flat = xis_rv_true.squeeze().flatten()
    check_idx = ~np.in1d(
        np.arange(len(xis_true_flat)), np.arange(3, len(xis_true_flat), step=4)
    )
    assert np.all(
        xis_true_flat[check_idx] <= hdi_data["xis", "higher"].values[check_idx]
    )
    assert np.all(
        xis_true_flat[check_idx] >= hdi_data["xis", "lower"].values[check_idx]
    )

    trace = posterior_trace.posterior.drop_vars(["Gamma", "V_t"])

    with aesara.config.change_flags(compute_test_value="off"):
        adds_pois_ppc = pm.sample_posterior_predictive(
            trace, var_names=["V_t", "Y_t", "Gamma"], model=model
        )

    assert (np.abs(adds_pois_ppc["V_t"] - test_V) / test_V.shape[0]).mean() < 1e-2
Exemplo n.º 2
0
def wake_mode_use():
    wake_data.sort_values('time_offset', inplace=True)
    time = np.array(wake_data.loc[:, 'time_offset'])
    wake_obs = np.array(wake_data.loc[:, 'indicator'])
    # ----------------------------------清醒数据模型---------------------------------
    with pm.Model() as wake_model:
        alpha = pm.Normal('alpha', mu=0.0, tau=0.01, testval=0.0)
        beta = pm.Normal('beta', mu=0.0, tau=0.01, testval=0.0)
        p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha)))
        # observed = pm.Bernoulli('obs', p, observed=wake_obs)
        step = pm.Metropolis()
        wake_trace = pm.sample(N_SAMPLES, step=step)
    # ------------------------------100个样本的后验概率--------------------------------------------------------------------
    alpha_samples = wake_trace["alpha"][100:, None]
    beta_samples = wake_trace["beta"][100:, None]
    time_est = np.linspace(time.min() - 15, time.max() + 15, int(1e3))[:, None]
    alpha_est = alpha_samples.mean()
    beta_est = beta_samples.mean()
    wake_est = logistic(time_est, beta=beta_est, alpha=alpha_est)

    figsize(13, 6)
    plt.plot(time_est, wake_est, color='darkred', lw=3, label="清醒时候的平均睡眠后验概率")
    plt.scatter(time,
                wake_obs,
                edgecolor='r',
                facecolor='r',
                s=50,
                alpha=0.05,
                label='观测值')
    plt.title('%d个样本的后验概率' % N_SAMPLES)
    plt.legend(prop={'size': 14})
    plt.ylabel('概率')
    plt.xlabel('上午时间')
    plt.xticks([-60, -30, 0, 30, 60, 90, 120], wake_labels)
    plt.show()

    print('清醒的概率大于50%的时间点位于上午 6:{}'.format(
        int(time_est[np.where(wake_est < 0.5)][0])))

    colors = ["#348ABD", "#A60628", "#7A68A6"]
    cmap = matplotlib.colors.LinearSegmentedColormap.from_list("BMH", colors)
    figsize(12, 6)
    probs = wake_trace['p']
    # ------------------------------上午时间的睡眠概率--------------------------------------------------------------------
    plt.scatter(time,
                probs.mean(axis=0),
                cmap=cmap,
                c=probs.mean(axis=0),
                s=50)
    plt.title('上午时间的睡眠概率')
    plt.xlabel('上午时间')
    plt.ylabel('概率')
    plt.xticks([-60, -30, 0, 30, 60, 90, 120], wake_labels)
    plt.show()

    print('上午5:30 清醒的概率: {:.2f}%.'.format(
        100 - (100 * logistic(-30, beta=beta_est, alpha=alpha_est))))
    print('上午6:00清醒的概率: {:.2f}%.'.format(
        100 - (100 * logistic(0, beta=beta_est, alpha=alpha_est))))
    print('上午6:30清醒的概率: {:.2f}%.'.format(
        100 - (100 * logistic(30, beta=beta_est, alpha=alpha_est))))
Exemplo n.º 3
0
 def test_stable(self):
     X = np.random.uniform(low=320., high=400., size=[2000, 2])
     with pm.Model() as model:
         cov = pm.gp.cov.ExpQuad(2, 0.1)
     dists = theano.function([], cov.square_dist(X, X))()
     assert not np.any(dists < 0)
Exemplo n.º 4
0
#     start = pm.find_MAP()
#     # step = pm.Metropolis()
#     trace2 = pm.sample(4000, start=start)
# chain2 = trace2
# varnames1 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4']
# pm.traceplot(chain2, varnames1)
# plt.show()
#
# # 画出自相关曲线
# pm.autocorrplot(chain2)
# plt.show()

# ======================================================================
# 改成威布尔分布后还需要调节参数
# #partial_model 部分集中模型
with pm.Model() as mulpartial_model:
    # define priors
    sigma = pm.HalfCauchy('sigma', 20)
    # sigma = pm.Normal('sigma', 0, 20)
    # nu = pm.Exponential('nu', 1/30)
    # mu_a = pm.Uniform('mu_a', -10, 10)
    # sigma_a = pm.HalfNormal('sigma_a', sd=10)
    # mu_a = pm.Uniform('mu_a', -10, 10)
    # sigma_a = pm.HalfNormal('sigma_a', sd=100)

    beta = pm.Normal('beta',  0, 100, shape=companiesABC)
    beta1 = pm.Normal('beta1', 0, 20)
    beta2 = pm.Normal('beta2', 0, 100)
    beta3 = pm.Normal('beta3', 0, 20)
    # beta4 = pm.Normal('beta4', 0, 20)
Exemplo n.º 5
0
sig2N = 10
sig2P = 25
muP = 0

# Predictor variable

Sest = np.zeros(N)
Sreal = np.zeros(N)
Xall = np.zeros(N)

for ii in xrange(N):
    X1 = np.random.rand(1) * 10 - 5
    lamb = 3 * np.exp(-(X1 - phi)**2 / (2.0 * sig2N))
    R = np.random.poisson(lamb, size=(n_input)).astype('float32')

    basic_model = pm.Model()

    with basic_model:

        # Priors for unknown model parameters
        alpha = pm.Normal('alpha', mu=muP, sd=sig2P)

        # Expected value of outcome
        mu = 3 * np.exp(-(alpha - phi)**2 / (2.0 * sig2N))

        # Likelihood (sampling distribution) of observations
        Y_obs = pm.Poisson('Y_obs', mu=mu, observed=R)

    map_estimate = pm.find_MAP(model=basic_model)
    a = np.ones(n_input) / sig2N
    e = phi / sig2N
Exemplo n.º 6
0
# 50%      52.760000
# 75%      54.595000
# max      57.480000

data.describe()
# count    48.000000
# mean     53.496458
# std       3.456198
# min      47.720000
# 25%      51.582500
# 50%      52.875000
# 75%      54.960000
# max      68.580000

# normal
with pm.Model() as model_g:
    mu = pm.Uniform('mu', lower=40, upper=70)
    sigma = pm.HalfNormal('sigma', sd=10)
    y = pm.Normal('y', mu=mu, sd=sigma, observed=data)
    trace_g = pm.sample(1000)

# students t
with pm.Model() as model_t:
    mu = pm.Uniform('mu', 40, 70)
    sigma = pm.HalfNormal('sigma', sd=10)
    v = pm.Exponential('v', 1 / 30)
    y = pm.StudentT('y', mu=mu, sd=sigma, nu=v, observed=data)
    trace_t = pm.sample(1000)

data2 = Series(data, copy=True)
data2[48] = 65
Exemplo n.º 7
0
# prepare data

milk = pd.read_csv('Data/milk.csv', sep=';')
milk.shape
d = milk.dropna().copy()
d.shape
d.columns
d['neocortex'] = d['neocortex.perc']/100
d[['neocortex', 'neocortex.perc']]
d['lmass'] = np.log(d['mass'])

# fit models
d['kcal.per.g'].describe()
#m6_11 = pm.Model()
with pm.Model() as m6_11:
    alpha = pm.Uniform('alpha', 0, 5)
    log_sigma = pm.Uniform('log_sigma', -10, 10)
    mu = alpha
    y_obs = pm.Normal('y_obs', mu=mu, sigma=np.exp(log_sigma), observed=d['kcal.per.g'])

pm.find_MAP(model=m6_11, method='BFGS')

with m6_11:
    trace = pm.sample(2000, return_inferencedata=True, chains=2)
pm.summary(trace)
az.summary(trace)
#pm.gelman_rubin(trace)
with m6_11:
    az.plot_trace(trace)
plt.show()
Exemplo n.º 8
0
 def test_discrete_continuous(self):
     with pm.Model() as model:
         a = pm.Poisson("a", 5)
         b = pm.HalfNormal("b", 10)
         y = pm.Normal("y", a, b, observed=[1, 2, 3, 4])
         trace = pm.sample_smc()
Exemplo n.º 9
0
    #  generate some data

    y = np.zeros((len(timerange)))
    for i in range(len(timerange)):
        y[i] = alpha + beta * timerange[i]**2 + np.random.rand()

    y = [y, y + np.random.rand(len(y))]

    def sys_model(alpha, beta):

        return alpha + beta * timerange**2

    #plt.figure()
    #plt.scatter(timerange, y)

    time_varying_model = pm.Model()

    with time_varying_model:

        #  Set up priors
        alpha = pm.Normal("alpha", mu=0, sigma=10)
        beta = pm.Normal("beta", mu=0, sigma=10)
        sigma = pm.HalfNormal("sigma", sigma=1)

        #  System model
        mu = sys_model(alpha, beta)

        #  Likelihood of observations
        Y = pm.Normal("y", mu=mu, sigma=sigma, observed=y)

        # Sampler to use
V_obs2 = V_obs2 - V_gas2

M_R_bulge = []
for i in Radial_distance:
    M_R_bulge.append(simpsons_integration(0.0001,i,5000,Bulge)/i)
M_R_bulge = np.array(M_R_bulge)


M_R_disk = []
for i in Radial_distance:
    M_R_disk.append(simpsons_integration(0.0001,i,5000,Disc)/i)
M_R_disk = np.array(M_R_disk)



total_model = pm.Model()

with total_model:

    #priors
    sigma = pm.HalfNormal("sigma" , sigma = 0.4)
    gamma = pm.Gamma("gamma", alpha = 3, beta = 1)
    ah = pm.Gamma("ah", alpha = 3, beta = 1)
    Mh = pm.Gamma("Mh", alpha = 3, beta = 1)
    M_by_L_bulge =  pm.Gamma("M_by_L_bulge", alpha = 3, beta = 1)
    M_by_L_disk = pm.Gamma("M_by_L_disc", alpha = 3, beta = 1)

    bulge_rot = M_by_L_bulge*M_R_bulge
    disk_rot = M_by_L_disk*M_R_disk
    halo_rot = (Mh*Radial_distance**(gamma - 1))/((ah**gamma)*(1 + ((Radial_distance/ah)**(gamma-1)) ))
    total_rot =  bulge_rot + disk_rot + halo_rot 
Exemplo n.º 11
0
import matplotlib.pyplot as plt
import pymc3 as pm
import numpy as np

# import pydevd
# pydevd.set_pm_excepthook()
np.seterr(invalid='raise')

data = np.random.normal(size=(2, 20))

model = pm.Model()

with model:
    x = pm.Normal('x', mu=.5, tau=2.**-2, shape=(2, 1))
    z = pm.Beta('z', alpha=10, beta=5.5)
    d = pm.Normal('data', mu=x, tau=.75**-2, observed=data)
    step = pm.NUTS()


def run(n=1000):
    if n == "short":
        n = 50
    with model:
        trace = pm.sample(n, step)

    plt.subplot(2, 2, 1)
    plt.plot(trace[x][:, 0, 0])
    plt.subplot(2, 2, 2)
    plt.hist(trace[x][:, 0, 0])

    plt.subplot(2, 2, 3)
        plt.xlabel("$x_{}$".format(idx))
        plt.ylabel("$y$", rotation=0)

    plt.subplot(2, 2, idx + 2)
    plt.scatter(x[0], x[1])

    plt.xlabel("$x_{}$".format(idx - 1))
    plt.xlabel("$x_{}$".format(idx), rotation=0)


X = np.vstack((x_0, x_1))
scatter_plot(X, y)
plt.savefig("masking_effect_variables_data.png")
plt.close()

with pm.Model() as model_ma:
    alpha = pm.Normal("alpha", mu=0, sd=10)
    beta = pm.Normal("beta", mu=0, sd=10, shape=2)
    epsilon = pm.HalfCauchy("epsilon", 5)

    mu = alpha + pm.math.dot(beta, X)

    y_pred = pm.Normal("y_pred", mu=mu, sd=epsilon, observed=y)

    start = pm.find_MAP()
    step = pm.NUTS(scaling=start)
    trace_ma = pm.sample(5000, step=step, start=start)

pm.traceplot(trace_ma)
plt.savefig("masking_effect_variables_traceplot.png")
plt.close()
Exemplo n.º 13
0
for g in range(12, n_genes) :
    plt.subplot(2, 3, g-11)
    plt.plot(times, np.nanmean(replicatsCRTG[c, : , : , g], axis = 0), label = 'measures', marker = 'x')
    plt.plot(times, theory [c, : , g, 0], label = 'model predicted, of which')
    plt.plot(times, theory1[c, : , g, 0], label = '1. own contribution')
    plt.plot(times, theory2[c, : , g, 0], label = '2. production due to TF')
    plt.step(times, piecewiseeta[c, :, 0, -1], where='post', label = 'TF activity')
    plt.title('gene'+str(genes_numbers[g]))
    plt.legend()
    plt.grid(True)
plt.savefig(figure2_title+'.png')
plt.show()
"""

# 4th step: find better values of the parameters
with pm.Model() as Basic_model:

    # Priors for unknown model parameters
    alpha = pm.Uniform('alpha',
                       lower=0.2 * alpha0,
                       upper=2. * alpha0,
                       shape=n_genes_under_TF_control)
    #    alpha = pm.Gamma('alpha', mu = alpha0, sigma = alpha0, shape = n_genes_under_TF_control)
    beta = pm.Uniform('beta',
                      lower=0.2 * beta0,
                      upper=2. * beta0,
                      shape=n_genes_under_TF_control)
    #    beta  = pm.Gamma('beta', mu = beta0, sigma = beta0, shape = n_genes_under_TF_control)
    gamma = pm.Uniform('gamma',
                       lower=0.2 * gamma0,
                       upper=2. * gamma0,
Exemplo n.º 14
0
import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt

iris = sns.load_dataset('iris')
df = iris.query("species == ('setosa', 'versicolor')")
y_0 = pd.Categorical(df['species']).codes
x_n = 'sepal_length'
x_0 = df[x_n].values
y_0 = np.concatenate((y_0, np.ones(6)))
x_0 = np.concatenate((x_0, [4.2, 4.5, 4.0, 4.3, 4.2, 4.4]))
x_0_m = x_0 - x_0.mean()
plt.plot(x_0, y_0, 'o', color='k')
plt.show()

with pm.Model() as model_rlg:
    alpha_tmp = pm.Normal('alpha_tmp', mu=0, sd=100)
    beta = pm.Normal('beta', mu=0, sd=10)

    mu = alpha_tmp + beta * x_0_m
    theta = pm.Deterministic('theta', 1 / (1 + pm.math.exp(-mu)))

    pi = pm.Beta('pi', 1, 1)
    p = pi * 0.5 + (1 - pi) * theta

    alpha = pm.Deterministic('alpha', alpha_tmp - beta * x_0.mean())
    bd = pm.Deterministic('bd', -alpha / beta)

    yl = pm.Bernoulli('yl', p=p, observed=y_0)

    trace_rlg = pm.sample(2000, start=pm.find_MAP())
Exemplo n.º 15
0
plt.figure()

data = np.array([
    51.06, 55.12, 53.73, 50.24, 52.05, 56.40, 48.45, 52.34, 55.65, 51.49,
    51.86, 63.43, 53.00, 56.09, 51.93, 52.31, 52.33, 57.48, 57.44, 55.14,
    53.93, 54.62, 56.09, 68.58, 51.36, 55.47, 50.73, 51.94, 54.95, 50.39,
    52.91, 51.50, 52.68, 47.72, 49.73, 51.82, 54.99, 52.84, 53.19, 54.52,
    51.46, 53.73, 51.61, 49.81, 52.42, 54.30, 53.84, 53.16
])

sns.kdeplot(data)
plt.savefig('img302.png', dpi=300, figsize=(5.5, 5.5))

plt.figure()

with pm.Model() as model_g:
    mu = pm.Uniform('mu', 40, 75)
    sigma = pm.HalfNormal('sigma', sd=10)
    y = pm.Normal('y', mu=mu, sd=sigma, observed=data)
    trace_g = pm.sample(1100, njobs=1)

chain_g = trace_g[100:]
pm.traceplot(chain_g)
plt.savefig('img304.png', dpi=300, figsize=(5.5, 5.5))

plt.figure()

df = pm.summary(chain_g)

y_pred = pm.sample_ppc(chain_g, 100, model_g, size=len(data))
sns.kdeplot(data, c='b')
Exemplo n.º 16
0
    def fit(self,
            X,
            y,
            y_error=1,
            x_error=None,
            *,
            sample_kwargs={
                'draws': 1000,
                'target_accept': 0.9
            }):

        kwds = {}
        if self.kwds is not None:
            kwds.update(self.kwds)
        kwds['fit_intercept'] = False
        model = self._choose_regressor()
        self.clf_ = model(**kwds)

        self.fit_intercept = False

        if x_error is not None:
            x_error = np.atleast_2d(x_error)
        with pm.Model():
            # slope and intercept of eta-ksi relation
            slope = pm.Flat('slope', shape=(X.shape[0], ))
            inter = pm.Flat('inter')

            # intrinsic scatter of eta-ksi relation
            int_std = pm.HalfFlat('int_std')
            # standard deviation of Gaussian that ksi are drawn from (assumed mean zero)
            tau = pm.HalfFlat('tau', shape=(X.shape[0], ))
            # intrinsic ksi
            mu = pm.Normal('mu', mu=0, sigma=tau, shape=(X.shape[0], ))

            # Some wizzarding with the dimensions all around.
            ksi = pm.Normal('ksi', mu=mu, tau=tau, shape=X.T.shape)

            # intrinsic eta-ksi linear relation + intrinsic scatter
            eta = pm.Normal('eta',
                            mu=(tt.dot(slope.T, ksi.T) + inter),
                            sigma=int_std,
                            shape=y.shape)

            # observed xi, yi
            x = pm.Normal('xi',
                          mu=ksi.T,
                          sigma=x_error,
                          observed=X,
                          shape=X.shape)
            y = pm.Normal('yi',
                          mu=eta,
                          sigma=y_error,
                          observed=y,
                          shape=y.shape)

            self.trace = pm.sample(**sample_kwargs)

            # TODO big: make it optional to choose a way to define best

            # TODO quick: use np.histogramdd
            H2D, bins1, bins2 = np.histogram2d(self.trace['slope'][:, 0],
                                               self.trace['inter'],
                                               bins=50)

            w = np.where(H2D == H2D.max())

            # choose the maximum posterior slope and intercept
            slope_best = bins1[w[0][0]]
            intercept_best = bins2[w[1][0]]
            self.clf_.coef_ = np.array([intercept_best, slope_best])

        return self
def MultiOutput_Bayesian_Calibration(n_y,DataComp,DataField,DataPred,output_folder):
    # This is data preprocessing part
    n = np.shape(DataField)[0] # number of measured data
    m = np.shape(DataComp)[0] # number of simulation data

    p = np.shape(DataField)[1] - n_y # number of input x
    q = np.shape(DataComp)[1] - p - n_y # number of calibration parameters t

    xc = DataComp[:,n_y:] # simulation input x + calibration parameters t
    xf = DataField[:,n_y:] # observed input

    yc = DataComp[:,:n_y] # simulation output
    yf = DataField[:,:n_y] # observed output

    x_pred = DataPred[:,n_y:] # design points for predictions
    y_true = DataPred[:,:n_y] # true measured value for design points for predictions
    n_pred = np.shape(x_pred)[0] # number of predictions
    N = n+m+n_pred

    # Put points xc, xf, and x_pred on [0,1] 
    for i in range(p):
        x_min = min(min(xc[:,i]),min(xf[:,i]))
        x_max = max(max(xc[:,i]),max(xf[:,i]))
        xc[:,i] = (xc[:,i]-x_min)/(x_max-x_min)
        xf[:,i] = (xf[:,i]-x_min)/(x_max-x_min)
        x_pred[:,i] = (x_pred[:,i]-x_min)/(x_max-x_min)

    # Put calibration parameters t on domain [0,1]
    for i in range(p,(p+q)):
        t_min = min(xc[:,i])
        t_max = max(xc[:,i])
        xc[:,i] = (xc[:,i]-t_min)/(t_max-t_min)

    # store mean and std of yc for future scale back use
    yc_mean = np.zeros(n_y)
    yc_sd = np.zeros(n_y)

    # standardization of output yf and yc
    for i in range(n_y):
        yc_mean[i] = np.mean(yc[:,i])
        yc_sd[i] = np.std(yc[:,i])
        yc[:,i] = (yc[:,i]-yc_mean[i])/yc_sd[i]
        yf[:,i] = (yf[:,i]-yc_mean[i])/yc_sd[i]

    # This is modeling part
    with pm.Model() as model:
        # Claim prior part
        eta1 = pm.HalfCauchy("eta1", beta=5) # for eta of gaussian process
        lengthscale = pm.Gamma("lengthscale", alpha=2, beta=1, shape=(p+q)) # for lengthscale of gaussian process
        tf = pm.Beta("tf", alpha=2, beta=2, shape=q) # for calibration parameters
        sigma1 = pm.HalfCauchy('sigma1', beta=5) # for noise
        y_pred = pm.Normal('y_pred', 0, 1.5, shape=(n_pred,n_y)) # for y prediction

        # Setup prior of right cholesky matrix
        sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=n_y)
        colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=n_y, eta=2,sd_dist=sd_dist)
        colchol = pm.expand_packed_triangular(n_y, colchol_packed)

        # Concate data into a big matrix[[xf tf], [xc tc], [x_pred tf]]
        xf1 = tt.concatenate([xf, tt.fill(tt.zeros([n,q]), tf)], axis = 1)
        x_pred1 = tt.concatenate([x_pred, tt.fill(tt.zeros([n_pred,q]), tf)], axis = 1)
        X = tt.concatenate([xf1, xc, x_pred1], axis = 0)
        # Concate data into a big matrix[[yf], [yc], [y_pred]]
        y = tt.concatenate([yf, yc, y_pred], axis = 0)

        # Covariance funciton of gaussian process
        cov_z = eta1**2 * pm.gp.cov.ExpQuad((p+q), ls=lengthscale)
        # Gaussian process with covariance funciton of cov_z
        gp = MultiMarginal(cov_func = cov_z)

        # Bayesian inference
        matrix_shape = [n+m+n_pred,n_y]
        outcome = gp.marginal_likelihood("outcome", X=X, y=y, colchol=colchol, noise=sigma1, matrix_shape=matrix_shape)
        trace = pm.sample(250,cores=1)

    # This part is for data collection and visualization
    pm.summary(trace).to_csv(output_folder + '/trace_summary.csv')
    print(pm.summary(trace))

    name_columns = []
    n_columns = n_pred
    for i in range(n_columns):
        for j in range(n_y):
            name_columns.append('y'+str(j+1)+'_pred'+str(i+1))
    y_prediction = pd.DataFrame(np.array(trace['y_pred']).reshape(500,n_pred*n_y),columns=name_columns)

    #Draw Picture of cvrmse_dist and calculate index
    for i in range(n_y):
        index = list(range(0+i,n_pred*n_y+i,n_y))
        y_prediction1 = pd.DataFrame(y_prediction.iloc[:,index])
        y_prediction1 = y_prediction1*yc_sd[i]+yc_mean[i] # Scale y_prediction back
        y_prediction1.to_csv(output_folder + '/y_pred'+str(i+1)+'.csv') # Store y_prediction

        # Calculate the distribution of cvrmse
        cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction1-y_true[:,i]),axis=1)/n_pred)/np.mean(y_true[:,i])
        # Calculate the index and store it into csv
        index_cal(y_prediction1,y_true[:,i]).to_csv(output_folder + '/index'+str(i+1)+'.csv')
        # Draw pictrue of cvrmse distribution of each y
        plt.subplot(n_y, 1, i+1)
        plt.hist(cvrmse)

    plt.savefig(output_folder + '/cvrmse_dist.pdf')
    plt.close()

    #Draw Picture of Prediction_Plot
    for i in range(n_y):
        index = list(range(0+i,n_pred*n_y+i,n_y))

        y_prediction_mean = np.array(pm.summary(trace)['mean'][index])*yc_sd[i]+yc_mean[i]
        y_prediction_975 = np.array(pm.summary(trace)['hpd_97.5'][index])*yc_sd[i]+yc_mean[i]
        y_prediction_025 = np.array(pm.summary(trace)['hpd_2.5'][index])*yc_sd[i]+yc_mean[i]

        plt.subplot(n_y, 1, i+1)
        # estimated probability
        plt.scatter(x=range(n_pred), y=y_prediction_mean)
        # error bars on the estimate
         

        plt.vlines(range(n_pred), ymin=y_prediction_025, ymax=y_prediction_975)
        # actual outcomes
        plt.scatter(x=range(n_pred),
                   y=y_true[:,i], marker='x')

        plt.xlabel('predictor')
        plt.ylabel('outcome')

        # This is just to print original cvrmse to test whether outcome good
        if i == 0:
            cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction_mean-y_true[:,0]))/len(y_prediction_mean-y_true[:,0]))/np.mean(y_true[:,0])
            print(cvrmse)

    plt.savefig(output_folder + '/Prediction_Plot.pdf')
    plt.close()
Exemplo n.º 18
0
def three_var_model():
    with pm.Model() as model:
        pm.HalfNormal('one', shape=(10, 2), total_size=100)
        pm.Normal('two', shape=(10, ))
        pm.Normal('three', shape=(10, 1, 2))
    return model
Exemplo n.º 19
0
def lm(x,
       y,
       trace=None,
       credible_interval=0.95,
       ax=None,
       bandalpha=0.6,
       scatter_kws={},
       **kwargs):
    """Make a custom linear model plot with confidence bands.

    Args:
        x (array like): x values
        y (array like): y values
        trace (pymc3.MultiTrace, optional): GLM trace from PyMC3.
        ax (matplotlib.axis, optional): Axis to plot on. Defaults to current axis.
        bandalpha (float, optional): Opacity level of confidence band.
        scatter_kws (dict, optional): Dictionary of keyword arguments passed onto `scatter`.
        **kwargs: Keyword arguments passed onto plot of regression line.

    Returns:
        matplotlib.axis: Axis with the linear model plot.
    """
    if ax is None:
        ax = plt.gca()

    # Determine color (this is necessary so that the scatter and the line have the same color)
    color = next(ax._get_lines.prop_cycler)["color"]

    # Scatter

    print(scatter)
    ax = scatter(x, y, color=color, ax=ax, **scatter_kws)

    # Run GLM in PyMC3
    if trace is None:
        df = pd.DataFrame(dict(x=x, y=y))
        with pm.Model() as glm:
            pm.GLM.from_formula("y ~ x", data=df)
            trace = pm.sample()

    summary = pm.summary(trace)

    # Plot MAP regression line
    xs = np.linspace(np.min(x), np.max(x), 100)
    intercept = summary.loc["Intercept", "mean"]
    beta = summary.loc["x", "mean"]
    ax.plot(xs, intercept + beta * xs, color=color, zorder=4, **kwargs)

    # Plot posterior predictive credible region band
    intercept_samples = trace.get_values("Intercept")
    beta_samples = trace.get_values("x")
    ypred = intercept_samples + beta_samples * xs[:, None]
    ypred_lower = np.quantile(ypred, (1 - credible_interval) / 2, axis=1)
    ypred_upper = np.quantile(ypred, 1 - (1 - credible_interval) / 2, axis=1)
    ax.fill_between(
        xs,
        ypred_lower,
        ypred_upper,
        color=color,
        zorder=1,
        alpha=bandalpha,
        linewidth=0,
    )

    return ax, trace, summary
Exemplo n.º 20
0
    def train(self, fol_path='../data/*'):

        fol_list = glob.glob(fol_path)
        print(fol_list)
        seq_list = []
        for fol in fol_list:
            f_list = glob.glob(fol + '/*.jpg')
            im_list = []
            for f in sorted(f_list):
                #Crop to ultrasound active area
                im = np.mean(cv2.resize(
                    cv2.imread(f)[180:700, 500:1020, :], (self.w, self.h)),
                             axis=-1)
                im_list.append(im)
            seq_list.append(np.array(im_list))

        # Get latent states
        self.latent_list = []
        for s in seq_list[:-1]:
            self.latent_list.append(
                self.vae_model.encoder.predict(
                    s.reshape(-1, self.w, self.h, 1) / 255.0)[0])
        self.latent = np.vstack(self.latent_list)

        np.savetxt(self.log_path + 'latent.txt', self.latent)

        #Generate training pairs
        print('Generating training pairs')
        G = self.generate_pairs(self.latent_list)
        W = np.arange(self.latent.shape[0]).astype(int)

        Gt = tt.as_tensor(G)
        W = W.astype(int)
        Xt = tt.as_tensor(self.latent)

        with pm.Model() as reward_model:

            l = pm.Gamma("l", alpha=2.0, beta=0.5)

            cov_func = pm.gp.cov.Matern32(self.latent.shape[1], ls=l)

            Xu = pm.gp.util.kmeans_inducing_points(self.Ni, self.latent)

            sig = pm.HalfCauchy("sig",
                                beta=np.ones((self.latent.shape[0], )),
                                shape=self.latent.shape[0])

            gp = pm.gp.MarginalSparse(cov_func=cov_func)

            f = gp.marginal_likelihood('reward',
                                       Xt,
                                       Xu,
                                       shape=self.latent.shape[0],
                                       y=None,
                                       noise=sig,
                                       is_observed=False)

            diff = f[Gt[:, 0]] - f[Gt[:, 1]]

            p = pm.math.sigmoid(diff)

            wl = pm.Bernoulli('observed wl',
                              p=p,
                              observed=np.ones((G.shape[0], )),
                              total_size=self.latent.shape[0])
            inference = pm.ADVI()

            train_probs = inference.approx.sample_node(p)

        train_accuracy = (train_probs > 0.5).mean(-1)
        eval_tracker = pm.callbacks.Tracker(train_accuracy=train_accuracy.eval)
        approx = inference.fit(1000,
                               obj_optimizer=pm.adam(learning_rate=0.1),
                               callbacks=[eval_tracker])

        trace = approx.sample(5000)
        l = np.mean(trace['l'])
        sig = np.mean(trace['sig'])
        reward = np.mean(trace['reward'], axis=0)
        np.savetxt('./logs/l.txt', np.array([l]))
        np.savetxt('./logs/sig.txt', np.array([sig]))
        np.savetxt('./logs/reward.txt', reward)

        print('Saved trained reward parameters')
        return l, sig, reward
Exemplo n.º 21
0
    FontPath = '/usr/share/fonts/truetype/takao-gothic/TakaoPGothic.ttf'
else:
    print('このPythonコードが対応していないOSを使用しています.')
    sys.exit()
jpfont = FontProperties(fname=FontPath)
#%% 回帰モデルからのデータ生成
n = 50
np.random.seed(99)
u = st.norm.rvs(scale=0.7, size=n)
x = st.uniform.rvs(loc=-np.sqrt(3.0), scale=2.0 * np.sqrt(3.0), size=n)
y = 1.0 + 2.0 * x + u
#%% 回帰モデルの係数と誤差項の分散の事後分布の設定(ラプラス+半コーシー分布)
b0 = np.zeros(2)
tau_coef = np.ones(2)
tau_sigma = 1.0
regression_laplace_halfcauchy = pm.Model()
with regression_laplace_halfcauchy:
    sigma = pm.HalfCauchy('sigma', beta=tau_sigma)
    a = pm.Laplace('a', mu=b0[0], b=tau_coef[0])
    b = pm.Laplace('b', mu=b0[1], b=tau_coef[1])
    y_hat = a + b * x
    likelihood = pm.Normal('y', mu=y_hat, sd=sigma, observed=y)
#%% 事後分布からのサンプリング
n_draws = 5000
n_chains = 4
n_tune = 1000
with regression_laplace_halfcauchy:
    trace = pm.sample(draws=n_draws,
                      chains=n_chains,
                      tune=n_tune,
                      random_seed=123)
# seems to fit the empirical data the best.
logistic_dist = getattr(sp.stats, 'logistic')
logistic_param = logistic_dist.fit(y) # mu, shape
# Logistic distribution does not account for skew, so we will fit a lognormal distribution also.
lognorm_dist = getattr(sp.stats, 'lognorm')
lognorm_param = lognorm_dist.fit(y) # shape, loc, scale

# Find the SD of SD each week
sd = [0] * len(y)
for i in range(0,len(y)-1,5):
    sd[i:i+5] = [np.std(y[i:i+5])] * 5
sd_sd = np.std(sd)

# The Ergotic Theorem for Markov Chains allows us to find the posterior distribution
# by simulating a large sample size with  Monte Carlo
with pm.Model() as gs_model:
    
    # Assume returns follow a log-normal distribution, 
    # common assumption for stock returns because it can account for the skew
    # semi-informed with SD of fitted lognorm likelihood estimation
    mu = pm.Lognormal('mu', sigma=lognorm_param[0])
    
    PositiveNormal = pm.Bound(pm.Normal, lower=0.0)
    sigma = PositiveNormal('sigma', mu=np.std(y), sigma=sd_sd)
    
    # Assume prior returns follows a gaussian random walk because stock returns are nonstationary
    # so this helps models the stochastic process
    # semi-informed with SD likelihood estimation
    returns = pm.GaussianRandomWalk('returns', mu=mu, sigma=sigma, shape=len(y))
    
    # Assume shape follows a positive normal distribution centered around the prior shape
Exemplo n.º 23
0
# %run notebook_setup
# -

# # Citing exoplanet & its dependencies

# The *exoplanet* package is mostly just glue that connects many other ideas and software.
# In a situation like this, it can be easy to forget about the important infrastructure upon which our science is built.
# In order to make sure that you can easily give credit where credit is due, we have tried to make it as painless as possible to work out which citations are expected for a model fit using *exoplanet* by including a :func:`exoplanet.citations.get_citations_for_model` function that introspects the current PyMC3 model and constructs a list of citations for the functions used in that model.
#
# For example, you might compute a quadratically limb darkened light curve using `starry` (via the :class:`exoplanet.LimbDarkLightCurve` class):

# +
import pymc3 as pm
import exoplanet as xo

with pm.Model() as model:
    u = xo.distributions.QuadLimbDark("u")
    orbit = xo.orbits.KeplerianOrbit(period=10.0)
    light_curve = xo.LimbDarkLightCurve(u)
    transit = light_curve.get_light_curve(r=0.1, orbit=orbit, t=[0.0, 0.1])

    txt, bib = xo.citations.get_citations_for_model()
# -

# The :func:`exoplanet.citations.get_citations_for_model` function would generate an acknowledgement that cites:
#
# * [PyMC3](https://docs.pymc.io/#citing-pymc3): for the inference engine and modeling framework,
# * [Theano](http://deeplearning.net/software/theano/citation.html): for the numerical infrastructure,
# * [AstroPy](http://www.astropy.org/acknowledging.html): for units and constants,
# * [Kipping (2013)](https://arxiv.org/abs/1308.0009): for the reparameterization of the limb darkening parameters for a quadratic law, and
# * [Luger, et al. (2018)](https://arxiv.org/abs/1810.06559): for the light curve calculation.
Exemplo n.º 24
0
@author: k20087271
"""

import pymc3 as pm
import numpy as np
import arviz as az
import matplotlib.pyplot as plt
import theano
import seaborn as sns
import theano.tensor as t

RANDOM_SEED = 58

pi = np.pi
ACB_model = pm.Model()  
L = 295
E = 0.6
m31 = 2.6*10**(-3)

delta31 = 1.27*L*m31/E #we set M21 = 0



if __name__ == '__main__':
    with ACB_model:
        
        #priors for unknown model parameters:
        theta12 = pm.Uniform("theta12",0,pi/2.) #mu = 0.57, sigma = 0.1)        #0,pi/2.)
        theta13 = pm.Uniform("theta13",0,pi/2.) #mu = 0.82, sigma = 0.1)        #0,pi/2.)
        theta23 = pm.Uniform("theta23",0,pi/2.) #mu = 0.14, sigma = 0.1)        #0,pi/2.)
Exemplo n.º 25
0
def sleep_mode_use():
    # 对时间偏移量进行排序
    sleep_data.sort_values('time_offset', inplace=True)
    # 提取时间偏移量
    time = np.array(sleep_data.loc[:, 'time_offset'])
    # 观察值是indicator
    sleep_obs = np.array(sleep_data.loc[:, 'indicator'])
    with pm.Model() as sleep_model:
        # 创建alpha和beta的先验分布
        alpha = pm.Normal('alpha', mu=0.0, tau=0.01, testval=0.0)
        beta = pm.Normal('beta', mu=0.0, tau=0.01, testval=0.0)
        # 创建一个逻辑函数的确定性变量
        p = pm.Deterministic('p', 1. / (1. + tt.exp(beta * time + alpha)))
        # 创建基于当前数据的伯努利变量
        # pm.Bernoulli('obs', p, observed=sleep_obs)
        # 使用 Metropolis Hastings 抽样
        step = pm.Metropolis()
        # 从后验中抽样
        # 从样本中使用MH采样得到alpha和beta的样本
        # sleep_trace 则保存了模型生成的所有参数值。step 变量指的是特定的算法,

        sleep_trace = pm.sample(N_SAMPLES, step=step)

    # 抽取alpha和beta的样本
    alpha_samples = sleep_trace["alpha"][100:, None]
    beta_samples = sleep_trace["beta"][100:, None]

    figsize(13, 6)
    # -----------------------------------100个样本的alpha和beta的可视化分布-----------------------------------------------------------------
    plt.subplot(211)
    plt.title(r""" %d 个样本的 $\alpha$ 分布""" % N_SAMPLES)

    plt.hist(alpha_samples,
             histtype='stepfilled',
             color='darkred',
             bins=30,
             alpha=0.8,
             density=True)
    plt.ylabel('概率密度')
    plt.show()

    plt.subplot(212)
    plt.title(r""" %d 个样本的 $\beta$ 分布""" % N_SAMPLES)
    plt.hist(beta_samples,
             histtype='stepfilled',
             color='darkblue',
             bins=30,
             alpha=0.8,
             density=True)
    plt.ylabel('概率密度')
    plt.show()
    # -----------------------------------5000个样本的睡眠概率分布-----------------------------------------------------------------
    # 设定概率预测的时间长度
    time_est = np.linspace(time.min() - 15, time.max() + 15, int(1e3))[:, None]
    # 取参数的均值
    alpha_est = alpha_samples.mean()
    beta_est = beta_samples.mean()
    # 使用参数的均值所生成的概率
    sleep_est = logistic(time_est, beta_est, alpha_est)

    plt.plot(time_est, sleep_est, color='navy', lw=3, label="最有可能的逻辑模型")
    plt.scatter(time,
                sleep_obs,
                edgecolor='slateblue',
                s=50,
                alpha=0.2,
                label='实际观测值')
    plt.title('%d 个样本的睡眠概率分布' % N_SAMPLES)
    plt.legend(prop={'size': 18})
    plt.ylabel('概率')
    plt.xlabel('下午时间')
    plt.xticks([-60, -30, 0, 30, 60, 90, 120], sleep_labels)
    plt.show()
    print('睡眠概率大于 50% 的时间点位于下午 22:{} '.format(
        int(time_est[np.where(sleep_est > 0.5)[0][0]][0])))

    colors = ["#348ABD", "#A60628", "#7A68A6"]
    cmap = matplotlib.colors.LinearSegmentedColormap.from_list("BMH", colors)
    figsize(12, 6)
    probs = sleep_trace['p']

    plt.scatter(time,
                probs.mean(axis=0),
                cmap=cmap,
                c=probs.mean(axis=0),
                s=50)
    plt.title('睡眠的概率是关于时间的函数')
    plt.xlabel('下午时间')
    plt.ylabel('概率')
    plt.xticks([-60, -30, 0, 30, 60, 90, 120], sleep_labels)

    print('22点的睡眠概率为: {:.2f}%.'.format(100 * logistic(0, beta_est, alpha_est)))
    print('21:30的睡眠概率为: {:.2f}%.'.format(100 *
                                         logistic(-30, beta_est, alpha_est)))
    print('22:30的睡眠概率为: {:.2f}%.'.format(100 *
                                         logistic(30, beta_est, alpha_est)))

    # ----------------------------------------beta和alpha的置信区间---------------------------------------------------------------------------------
    sleep_all_est = logistic(time_est.T, beta_samples, alpha_samples)
    quantiles = stats.mstats.mquantiles(sleep_all_est, [0.025, 0.975], axis=0)

    plt.fill_between(time_est[:, 0],
                     *quantiles,
                     alpha=0.6,
                     color='slateblue',
                     label='95% 置信区间')
    plt.plot(time_est,
             sleep_est,
             lw=2,
             ls='--',
             color='black',
             label="睡眠的平均后验概率")
    plt.xticks([-60, -30, 0, 30, 60, 90, 120], sleep_labels)
    plt.scatter(time, sleep_obs, edgecolor='skyblue', s=50, alpha=0.1)
    plt.legend(prop={'size': 14})
    plt.xlabel('PM Time')
    plt.ylabel('Probability')
    plt.title('后验概率的 95% 置信区间')
    plt.show()

    # -----------------------------特定时间的后验概率分布------------------------------------------------------
    def sleep_posterior(time_offset, time):  # 特定时间的后验概率分布
        figsize(16, 8)
        prob = logistic(time_offset, beta_samples, alpha_samples)
        plt.hist(prob, bins=100, histtype='step', lw=4)
        plt.title('  %s点睡眠的概率分布' % time)
        plt.xlabel('睡眠概率')
        plt.ylabel('样本量')
        plt.show()

    sleep_posterior(0, '22:00')
    sleep_posterior(-30, '21:30')

    print('alpha 参数估值: {:.6f}.'.format(alpha_est))
    print('beta 参数估值: {:.6f}.'.format(beta_est))

    # --------------------------------判断马尔可夫链蒙特卡罗模型是否收敛-------------------------------------------------------------------------
    # ------------------轨迹图--------------
    figsize(12, 6)
    plt.subplot(211)
    plt.title(r'Trace of $\alpha$')
    plt.plot(alpha_samples, color='darkred')
    plt.xlabel('样本量')
    plt.ylabel('参数')
    plt.show()

    plt.subplot(212)
    plt.title(r'Trace of $\beta$')
    plt.plot(beta_samples, color='b')
    plt.xlabel('样本量')
    plt.ylabel('参数')
    plt.tight_layout(h_pad=0.8)
    plt.show()
Exemplo n.º 26
0
plt.scatter(mus[0, 0], mus[0, 1], c='r', s=100)
plt.scatter(mus[1, 0], mus[1, 1], c='b', s=100)
plt.scatter(mus[2, 0], mus[2, 1], c='y', s=100)
# Then, ellipses
plot_ellipse(ax, mus, sigmas)
ax.axis('equal')
plt.show()

## Build model and sample
# Number of iterations for sampler
draws = 2000
# Prepare lists of starting points for mu to prevent label-switching problem
testvals = [[-2, -2], [0, 0], [2, 2]]

# Model structure
with pm.Model() as mvgmm:
    # Prior over component weights
    p = pm.Dirichlet('p', a=np.array([1.] * K))

    # Prior over component means
    mus = [
        pm.MvNormal('mu_%d' % i,
                    mu=pm.floatX(np.zeros(D)),
                    tau=pm.floatX(0.1 * np.eye(D)),
                    shape=(D, ),
                    testval=pm.floatX(testvals[i])) for i in range(K)
    ]

    # Cholesky decomposed LKJ prior over component covariance matrices
    packed_L = [
        pm.LKJCholeskyCov('packed_L_%d' % i,
Exemplo n.º 27
0
def sleep_time_mode_use():
    raw_data = pd.read_csv(
        'D:/weChatFile/WeChat Files/wxid_fg4c7ci7wpud21/FileStorage/File/2021-04/sleep_wake.csv'
    )
    raw_data['length'] = 8 - (raw_data['Sleep'] / 60) + (raw_data['Wake'] / 60)
    duration = raw_data['length']
    # -----------------------------睡眠时间长度-------------------------------------------------------------
    figsize(10, 8)
    plt.hist(duration, bins=20, color='darkred')
    plt.xlabel('小时')
    plt.title('睡眠时间长度分布')
    plt.ylabel('观测值')
    plt.show()
    # ---------------------------右偏睡眠时间长度概率密度----------------------------------------
    a = 3
    fig, ax = plt.subplots(1, 1)
    x = np.linspace(6, 12, int(1e3))

    figsize(10, 8)
    plt.hist(duration, bins=20, color='darkred', density=1, stacked=True)
    plt.xlabel('小时')
    plt.title('右偏的睡眠时间长度的概率密度(PDF)')
    plt.ylabel('观测值')
    plt.plot(x,
             stats.skewnorm.pdf(x, a, loc=7.4, scale=1),
             'r-',
             lw=3,
             label='skewnorm pdf')
    plt.show()

    # ------------------------------睡眠长度概率模型--------------------------------------------------
    with pm.Model() as duration_model:
        # 定义三个参数的先验概率分布其中我们增加了一个偏度参数alpha_skew
        alpha_skew = pm.Normal('alpha_skew', mu=0, tau=0.5, testval=3.0)
        mu_ = pm.Normal('mu', mu=0, tau=0.5, testval=7.4)
        tau_ = pm.Normal('tau', mu=0, tau=0.5, testval=1.0)

        # Duration 为一个确定性变量
        duration_ = pm.SkewNormal('duration',
                                  alpha=alpha_skew,
                                  mu=mu_,
                                  sd=1 / tau_,
                                  observed=duration)

        # Metropolis Hastings 抽样
        step = pm.Metropolis()
        duration_trace = pm.sample(N_SAMPLES, step=step)
    # --------------------抽取最有可能的估值参数---------------------------------------------------------
    # 抽取最有可能的估值参数
    alpha_skew_samples = duration_trace['alpha_skew'][1000:]
    mu_samples = duration_trace['mu'][1000:]
    tau_samples = duration_trace['tau'][1000:]

    alpha_skew_est = alpha_skew_samples.mean()
    mu_est = mu_samples.mean()
    tau_est = tau_samples.mean()
    # -----------------------睡眠长度后验分布长度可视化-------------------------------------------------------
    x = np.linspace(6, 12, 1000)
    y = stats.skewnorm.pdf(x, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est)
    plt.plot(x, y, color='forestgreen')
    plt.fill_between(x, y, color='forestgreen', alpha=0.2)
    plt.xlabel('小时')
    plt.ylabel('概率')
    plt.title('睡眠时间长度的后验分布')
    plt.vlines(x=x[np.argmax(y)],
               ymin=0,
               ymax=y.max(),
               linestyles='--',
               linewidth=2,
               color='red',
               label='最可能的睡眠时间长度')
    plt.show()
    print('最可能的睡眠时间长度为 {:.2f} 小时.'.format(x[np.argmax(y)]))
    # -----------------------查询后验概率模型--------------------------------------------------------------
    print('睡眠时间至少6.5小时的概率为:{:.2f}%.'.format(100 * (1 - stats.skewnorm.cdf(
        6.5, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est))))
    print('睡眠时间至少8小时的概率为:{:.2f}%.'.format(100 * (1 - stats.skewnorm.cdf(
        8.0, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est))))
    print('睡眠时间至少9小时的概率为:{:.2f}%.'.format(100 * (1 - stats.skewnorm.cdf(
        9.0, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est))))
    # -------------------------可视化后验和数据-------------------------------------------------------------------------------------
    x = np.linspace(6, 12, 1000)
    y = stats.skewnorm.pdf(x, a=alpha_skew_est, loc=mu_est, scale=1 / tau_est)
    figsize(10, 8)
    # 绘制后验概率分布
    plt.plot(x, y, color='forestgreen', label='Model', lw=3)
    plt.fill_between(x, y, color='forestgreen', alpha=0.2)

    # 绘制观测值直方图
    plt.hist(duration,
             bins=10,
             color='red',
             alpha=0.8,
             label='观测值',
             density=1,
             stacked=True)
    plt.xlabel('小时')
    plt.ylabel('概率')
    plt.title('模型')
    plt.vlines(x=x[np.argmax(y)],
               ymin=0,
               ymax=y.max(),
               linestyles='--',
               linewidth=2,
               color='k',
               label='最可能的睡眠时间长度')
    plt.legend(prop={'size': 12})
    plt.show()
Exemplo n.º 28
0
 def test_mixed2(self):
     with pm.Model():
         data = np.random.rand(10, 20, 30, 40, 50)
         mb = pm.Minibatch(data, [2, None, 20])
         Normal('n', observed=mb, total_size=(10, None, 30))
Exemplo n.º 29
0
 def test_raises2(self):
     with pm.Model() as model:
         with pytest.raises(ValueError):
             B = pm.gp.cov.Coregion(1, W=self.W, kappa=self.kappa, B=self.B)
Exemplo n.º 30
0
plt.plot(x, y, 'r-', lw=3, label='True distribution')
plt.hist(samples, bins=30, normed=True, label='Estimated distribution')
plt.xlabel('$x$', fontsize=14)
plt.ylabel('$pdf(x)$', fontsize=14)
plt.legend(fontsize=14)
plt.savefig('img203.png', dpi=300, figsize=(5.5, 5.5))

plt.figure()

np.random.seed(123)
n_experiments = 4
theta_real = 0.35
data = stats.bernoulli.rvs(p=theta_real, size=n_experiments)
print(data)

with pm.Model() as our_first_model:
    theta = pm.Beta('theta', alpha=1, beta=1)
    y = pm.Bernoulli('y', p=theta, observed=data)

    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(1000, step=step, start=start)

burnin = 100
chain = trace[burnin:]
pm.traceplot(chain, lines={'theta': theta_real})
plt.savefig('img204.png', dpi=300, figsize=(5.5, 5.5))

plt.figure()

with our_first_model: