Exemplo n.º 1
0
    def test_normal_vector(self, caplog):
        with pm.Model() as model:
            mu = pm.Normal("mu", 0.0, 1.0)
            a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2]))
            trace = pm.sample()

        with model:
            # test list input
            ppc0 = pm.sample_posterior_predictive([model.test_point],
                                                  samples=10)
            ppc = pm.sample_posterior_predictive(trace,
                                                 samples=12,
                                                 var_names=[])
            assert len(ppc) == 0

            # test list input
            ppc0 = pm.fast_sample_posterior_predictive([model.test_point],
                                                       samples=10)
            ppc = pm.fast_sample_posterior_predictive(trace,
                                                      samples=12,
                                                      var_names=[])
            assert len(ppc) == 0

            # test keep_size parameter
            ppc = pm.sample_posterior_predictive(trace, keep_size=True)
            assert ppc["a"].shape == (trace.nchains, len(trace), 2)
            with pytest.warns(UserWarning):
                ppc = pm.sample_posterior_predictive(trace,
                                                     samples=12,
                                                     var_names=["a"])
            assert "a" in ppc
            assert ppc["a"].shape == (12, 2)

            # test keep_size parameter with inference data as input...
            idata = az.from_pymc3(trace)
            ppc = pm.sample_posterior_predictive(idata, keep_size=True)
            assert ppc["a"].shape == (trace.nchains, len(trace), 2)
            with pytest.warns(UserWarning):
                ppc = pm.sample_posterior_predictive(trace,
                                                     samples=12,
                                                     var_names=["a"])
            assert "a" in ppc
            assert ppc["a"].shape == (12, 2)

            # test keep_size parameter
            ppc = pm.fast_sample_posterior_predictive(trace, keep_size=True)
            assert ppc["a"].shape == (trace.nchains, len(trace), 2)
            with pytest.warns(UserWarning):
                ppc = pm.fast_sample_posterior_predictive(trace,
                                                          samples=12,
                                                          var_names=["a"])
            assert "a" in ppc
            assert ppc["a"].shape == (12, 2)

            # test keep_size parameter with inference data as input
            ppc = pm.fast_sample_posterior_predictive(idata, keep_size=True)
            assert ppc["a"].shape == (trace.nchains, len(trace), 2)
            with pytest.warns(UserWarning):
                ppc = pm.fast_sample_posterior_predictive(trace,
                                                          samples=12,
                                                          var_names=["a"])
            assert "a" in ppc
            assert ppc["a"].shape == (12, 2)

            # size unsupported by fast_ version  argument. [2019/08/19:rpg]
            ppc = pm.sample_posterior_predictive(trace,
                                                 samples=10,
                                                 var_names=["a"],
                                                 size=4)
            assert "a" in ppc
            assert ppc["a"].shape == (10, 4, 2)
Exemplo n.º 2
0
    mu = pm.Normal('mu', mu=0, sd=10, shape=groups)
    sigma = pm.HalfNormal('sigma', sd=10, shape=groups)
    v = pm.Exponential('v', 1 / 30)
    y = pm.StudentT('y', mu=mu[idx], sd=sigma[idx], nu=v, observed=tip)
    trace1 = pm.sample(5000)

# outliers, but own can vary
with pm.Model() as model2:
    mu = pm.Normal('mu', mu=0, sd=10, shape=groups)
    sigma = pm.HalfNormal('sigma', sd=10, shape=groups)
    v = pm.Exponential('v', 1 / 30, shape=groups)
    y = pm.StudentT('y', mu=mu[idx], sd=sigma[idx], nu=v[idx], observed=tip)
    trace2 = pm.sample(5000)

y_pred = pm.sample_posterior_predictive(trace, 100, model)
data_ppc = az.from_pymc3(trace=trace, posterior_predictive=y_pred)
ax0 = az.plot_ppc(data_ppc, kind='kde', mean=False)
plt.xlim(-2, 8)

y_pred1 = pm.sample_posterior_predictive(trace1, 100, model1)
data_ppc1 = az.from_pymc3(trace=trace, posterior_predictive=y_pred1)
az.plot_ppc(data_ppc1, kind='kde', mean=False)
plt.xlim(-2, 8)

# works best by far
y_pred2 = pm.sample_posterior_predictive(trace2, 100, model2)
data_ppc2 = az.from_pymc3(trace=trace, posterior_predictive=y_pred2)
az.plot_ppc(data_ppc2, kind='kde', mean=False)
plt.xlim(-2, 8)
"""
Compute the probability of superiority directly from the posterior (without
Exemplo n.º 3
0
 def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture):
     pmodel, trace = point_list_arg_bug_fixture
     idat = az.from_pymc3(trace)
     with pmodel:
         pp = pm.sample_posterior_predictive(idat.posterior,
                                             var_names=["d"])
Exemplo n.º 4
0
    a = pm.Normal("a", mu=100, sd=250)
    b = pm.Normal("b", mu=10, sd=250)
    sigma = pm.HalfNormal("sigma", sd=200)
    y_pred = pm.Normal('y_pred',
                       mu=a + b * day_idx,
                       sd=sigma,
                       observed=reactions.Reaction,
                       dims="driver_idx_day")
    ## inference
    trace_p = pm.sample(samples, chains=chains, tune=tune)
    prior_p = pm.sample_prior_predictive(samples=samples)
    posterior_predictive_p = pm.sample_posterior_predictive(trace_p,
                                                            samples=samples)

## STEP 1
## export inference results in ArviZ InferenceData obj
## will also capture all the sampler statistics
data_p = az.from_pymc3(trace=trace_p,
                       prior=prior_p,
                       posterior_predictive=posterior_predictive_p)

## STEP 2
## extract dag
dag_p = get_dag(fullyPooled_model)
## insert dag into sampler stat attributes
data_p.sample_stats.attrs["graph"] = str(dag_p)

## STEP 3
## save data
fileName_p = "reaction_times_pooled"
arviz_to_json(data_p, fileName_p + '.npz')
Exemplo n.º 5
0
#model-inference
fileName='coal_mining_disasters_PyMC3'
samples=10000
tune=10000
chains=2
coords = {"year": years}
with pm.Model(coords=coords) as disaster_model:
    switchpoint = pm.DiscreteUniform('switchpoint', lower=years.min(), upper=years.max(), testval=1900)
    early_rate = pm.Exponential('early_rate', 1)
    late_rate = pm.Exponential('late_rate', 1)
    rate = pm.math.switch(switchpoint >= years, early_rate, late_rate)
    disasters = pm.Poisson('disasters', rate, observed=disaster_data, dims='year')
	#inference
	trace = pm.sample(samples, chains=chains, tune=tune)
    prior = pm.sample_prior_predictive(samples=samples)
    posterior_predictive = pm.sample_posterior_predictive(trace,samples=samples) 
	
## STEP 1	
# will also capture all the sampler statistics
data = az.from_pymc3(trace=trace, prior=prior, posterior_predictive=posterior_predictive)

## STEP 2	
# extract dag
dag = get_dag(disaster_model)
# insert dag into sampler stat attributes
data.sample_stats.attrs["graph"] = str(dag)

## STEP 3  
# save data      
arviz_to_json(data, fileName+'.npz')
Exemplo n.º 6
0
def causality_test():
    """ Load csv file to build EDA plots and PyMC models
	"""
    #Load Data https://github.com/grjd/causalityagingbrain/blob/main/dataset_gh.csv
    csv_path = ""
    dataframe = pd.read_csv(csv_path, sep=';')
    dataframe_orig = dataframe.copy()
    plots_and_stuff(df)

    corrmatrix = df.corr(method='pearson')
    mask = np.zeros_like(corrmatrix)
    mask[np.triu_indices_from(mask)] = True
    plt.figure(figsize=(7, 7))
    heatmap = sns.heatmap(corrmatrix,
                          mask=mask,
                          annot=True,
                          center=0,
                          square=True,
                          linewidths=.5)
    #heatmap = sns.heatmap(atrophy_corr,annot=True, center=0,square=True, linewidths=.5)
    heatmap.set_xticklabels(colsofinterest_Eng,
                            rotation=45,
                            fontsize='small',
                            horizontalalignment='right')
    heatmap.set_yticklabels(colsofinterest_Eng,
                            rotation=0,
                            fontsize='small',
                            horizontalalignment='right')
    fig_file = os.path.join(figures_dir, 'heat_CorrChapter.png')
    plt.savefig(fig_file)

    # Standardize regressors and target
    df["brain_std"] = standardize(df["fr_BrainSegVol_to_eTIV_y1"])
    df["age_std"] = standardize(df["edad_visita1"])
    df["cog_std"] = standardize(df["fcsrtlibdem_visita1"])
    # Encode Categorical Variables
    df["school_id"] = pd.Categorical(df["nivel_educativo"]).codes
    df["sex_id"] = pd.Categorical(df["sexo"]).codes

    ################################################################
    ################## SEX (0M, 1F) -> BRAIN #######################
    #################################################################
    with pm.Model() as mXB:
        #sigma = pm.Uniform("sigma", 0, 1)
        sigma = pm.HalfNormal("sigma", sd=1)
        #mu_x = pm.Normal("mu_x", 0.7, 0.3, shape=2)
        mu_x = pm.Normal("mu_x", 0.0, 1.0, shape=2)
        #brain_remained = pm.Normal("brain_remained", mu_x[df["sex_id"]], sigma, observed=df["fr_BrainSegVol_to_eTIV_y1"])
        brain_remained = pm.Normal("brain_remained",
                                   mu_x[df["sex_id"]],
                                   sigma,
                                   observed=df["brain_std"])
        # men - women
        # mu[0]  0.695,  mu[1]  0.709 Women came at late age with less atrophy, bigger brains
        diff_fm = pm.Deterministic("diff_fm", mu_x[0] - mu_x[1])
        mXB_trace = pm.sample(1000)
    print(az.summary(mXB_trace))
    az.plot_trace(mXB_trace, var_names=["mu_x", "sigma"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_sex_brain-hn.png'))
    az.plot_forest(mXB_trace,
                   combined=True,
                   model_names=["X~B"],
                   var_names=["mu_x"],
                   hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest_sex_brain-hn.png'))
    # Posterior Predictive checks
    y_pred_g = pm.sample_posterior_predictive(mXB_trace, 100, mXB)
    data_ppc = az.from_pymc3(trace=mXB_trace, posterior_predictive=y_pred_g)
    ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False)
    ax[0].legend(fontsize=15)
    plt.savefig(os.path.join(figures_dir, 'ppc_xXB-hn.png'))

    ################################################################
    ################## AGE -> BRAIN ################################
    #################################################################
    print('Calling to PyMC3 Model Age - > Brain...\n')
    with pm.Model() as m_AB:
        alpha = pm.Normal("alpha", 0, 1)  #0.2
        betaA = pm.Normal("betaA", 0, 1)  #0.5
        #sigma = pm.Exponential("sigma", 1)
        sigma = pm.HalfNormal("sigma", sd=1)
        mu = pm.Deterministic("mu", alpha + betaA * df["age_std"])
        brain_std = pm.Normal("brain_std",
                              mu=mu,
                              sigma=sigma,
                              observed=df["brain_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_AB_trace = pm.sample(1000)
    print(az.summary(m_AB_trace, var_names=["alpha", "betaA", "sigma"]))
    az.plot_trace(m_AB_trace, var_names=["alpha", "betaA", "sigma"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_age_brain.png'))
    az.plot_forest([
        m_AB_trace,
    ],
                   model_names=["A~B"],
                   var_names=["betaA"],
                   combined=True,
                   hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest_AtoB.png'))
    # Posterior Predictive checks
    y_pred_g = pm.sample_posterior_predictive(m_AB_trace, 100, m_AB)
    data_ppc = az.from_pymc3(trace=m_AB_trace, posterior_predictive=y_pred_g)
    ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False)
    ax[0].legend(fontsize=15)
    plt.savefig(os.path.join(figures_dir, 'ppc_AB-hn.png'))
    ################################################################
    ################## SEX+AGE -> BRAIN #######################
    #################################################################
    print('Calling to PyMC3 Model Age + Sex - > Brain...\n')
    sexco = pd.Categorical(df.loc[:, "sexo"].astype(int))
    with pm.Model() as m_XAB:
        alphax = pm.Normal("alphax", 0, 1, shape=2)
        betaA = pm.Normal("betaA", 0, 1)
        mu = alphax[sexco] + betaA * df["age_std"]
        sigma = pm.Exponential("sigma", 1)
        #mu = pm.Deterministic("mu", alpha + betaA * df["age_std"] + betaB * df["brain_std"])
        brain_std = pm.Normal("brain_std",
                              mu=mu,
                              sigma=sigma,
                              observed=df["brain_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_XAB_trace = pm.sample()
    print(az.summary(m_XAB_trace, var_names=["alphax", "betaA", "sigma"]))
    az.plot_trace(m_XAB_trace, var_names=["alphax", "betaA"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_ageandsex_brain.png'))
    az.plot_forest([
        m_XAB_trace,
        mXB_trace,
        m_AB_trace,
    ],
                   model_names=["XA~B", "X~B", "A~B"],
                   var_names=["alphax", "mu_x", "betaA"],
                   combined=True,
                   hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest_mXAtoB.png'))
    # Posterior Predictive checks
    y_pred_g = pm.sample_posterior_predictive(m_XAB_trace, 100, m_XAB)
    data_ppc = az.from_pymc3(trace=m_XAB_trace, posterior_predictive=y_pred_g)
    ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False)
    ax[0].legend(fontsize=15)
    plt.savefig(os.path.join(figures_dir, 'ppc_XAB-hn.png'))

    print('Calling to PyMC3 Model Brain - > Memory...\n')
    with pm.Model() as m_BC:
        alpha = pm.Normal("alpha", 0, 1)  #0.2
        betaB = pm.Normal("betaB", 0, 1)  #0.5
        sigma = pm.Exponential("sigma", 1)
        mu = pm.Deterministic("mu", alpha + betaB * df["brain_std"])
        cognition_std = pm.Normal("cognition_std",
                                  mu=mu,
                                  sigma=sigma,
                                  observed=df["cog_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_BC_trace = pm.sample()
    az.plot_trace(m_BC_trace, var_names=["alpha", "betaB"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_brain_cog.png'))
    print(az.summary(m_BC_trace, var_names=["alpha", "betaB", "sigma"]))
    # Scatter plot x = Brain atrophy Y= Memory test
    mu_mean = m_BC_trace['mu']
    mu_hpd = pm.hpd(mu_mean)
    plt.figure(figsize=(9, 9))
    df.plot('brain_std', 'cog_std', kind='scatter')  #, xlim = (-2, 2)
    plt.plot(df.brain_std, mu_mean.mean(0), 'C2')
    plt.savefig(os.path.join(figures_dir, 'scatter_hpd_B2M.png'))
    print('Saved Figure scatter_hpd_B2M.png \n')

    print('Calling to PyMC3 Model School - > Memory...\n')
    # School -> Memory method2  m5_9
    with pm.Model() as mSM2:
        #sigma = pm.Uniform("sigma", 0, 1)
        sigma = pm.Exponential("sigma", 1)
        mu = pm.Normal("mu", 0.0, 0.5, shape=df["school_id"].max() + 1)
        memory = pm.Normal("memory",
                           mu[df["school_id"]],
                           sigma,
                           observed=df["cog_std"])
        mSM2_trace = pm.sample()
    print(az.summary(mSM2_trace))
    az.plot_trace(mSM2_trace, var_names=["mu", "sigma"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace2_school_memory.png'))
    az.plot_forest(mSM2_trace, combined=True, var_names=["mu"], hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest2_school_memory.png'))
    pdb.set_trace()

    print('Calling to PyMC3 Model Age - > Memory...\n')
    with pm.Model() as m_AC:
        alpha = pm.Normal("alpha", 0, 1)
        betaA = pm.Normal("betaA", 0, 1)
        sigma = pm.Exponential("sigma", 1)
        mu = pm.Deterministic("mu", alpha + betaA * df["age_std"])
        cognition_std = pm.Normal("cognition_std",
                                  mu=mu,
                                  sigma=sigma,
                                  observed=df["cog_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_AC_trace = pm.sample()
    az.plot_trace(m_AC_trace, var_names=["alpha", "betaA"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_age_cog.png'))
    print(az.summary(m_AC_trace, var_names=["alpha", "betaA", "sigma"]))
    # Scatter A2M
    mu_mean = m_AC_trace['mu']
    mu_hpd = pm.hpd(mu_mean)
    plt.figure(figsize=(9, 9))
    df.plot('age_std', 'cog_std', kind='scatter')  #, xlim = (-2, 2)
    plt.plot(df.age_std, mu_mean.mean(0), 'C2')
    plt.savefig(os.path.join(figures_dir, 'scatter_hpd_A2M.png'))
    print('Saved Figure scatter_hpd_A2M.png \n')

    print('Calling to PyMC3 Model Age + Brain - > Memory...\n')
    with pm.Model() as m_BAC:
        alpha = pm.Normal("alpha", 0, 1)
        betaA = pm.Normal("betaA", 0, 1)
        betaB = pm.Normal("betaB", 0, 1)
        sigma = pm.Exponential("sigma", 1)
        mu = pm.Deterministic(
            "mu", alpha + betaA * df["age_std"] + betaB * df["brain_std"])
        cognition_std = pm.Normal("cognition_std",
                                  mu=mu,
                                  sigma=sigma,
                                  observed=df["cog_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_BAC_trace = pm.sample()
    print(
        az.summary(m_BAC_trace, var_names=["alpha", "betaB", "betaA",
                                           "sigma"]))
    az.plot_forest([
        m_BAC_trace,
        m_AC_trace,
        m_BC_trace,
    ],
                   model_names=["BA~C", "A~C", "B~C"],
                   var_names=["betaA", "betaB"],
                   combined=True,
                   hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest_mBAC_AB2M.png'))
az.plot_kde(data['EQ'].values, rug=True)
plt.yticks([0], alpha=0);

with pm.Model() as model_g:
    mu = pm.Normal('mu', mu=0, sd=5)
    tau = pm.HalfCauchy('tau', beta=5)
    theta_tilde = pm.Normal('theta_tilde',mu=0, sd=1)
    theta= pm.Deterministic('theta',mu+tau*theta_tilde)
    y = pm.Normal('y', mu=theta, sd=tau, observed=data['EQ'].values)
    trace_g = pm.sample(1000, chains=2)
    prior = pm.sample_prior_predictive()
    posterior_predictive = pm.sample_posterior_predictive(trace_g)

    pm.data = az.from_pymc3(
      trace=trace_g,
      prior=prior,
      posterior_predictive=posterior_predictive,
)
pm_data

pip install pymc3>=3.8

prior = pm.sample_prior_predictive()
posterior_predictive = pm.sample_posterior_predictive(trace_g)

pm.data = az.from_pymc3(
    trace=trace_g,
    prior=prior,
    posterior_predictive=posterior_predictive,
)
Exemplo n.º 8
0
emcee_data = az.from_emcee(
    sampler,
    var_names=[
        "mean",
        "log_sigma1",
        "log_rho1",
        "log_tau",
        "log_sigma2",
        "log_rho2",
        "log_jitter",
    ],
)

with model:
    pm_data = az.from_pymc3(trace)

numpyro_data = az.from_numpyro(mcmc)

bins = np.linspace(1.5, 2.75, 25)
plt.hist(
    np.exp(np.asarray((emcee_data.posterior["log_rho1"].T)).flatten()),
    bins,
    histtype="step",
    density=True,
    label="emcee",
)
plt.hist(
    np.exp(np.asarray((pm_data.posterior["log_rho1"].T)).flatten()),
    bins,
    histtype="step",
    y_ar.append(generate_ar_sample(tdaynew, omega, tt, use_ar=True))

a0_pred = np.array(y_ar)
    
#######
# Save the samples
#######

# Convert the data to arviz structure 
# Save the predictions
dims = ('chain','draw','time')
ds = az.from_pymc3_predictions({'a0':a0_pred}, \
                coords={'time':predtime,'chain':np.array([1])}, dims={'a0':dims})

# Save the posterior
ds2 = az.from_pymc3(trace=trace)

# Update the observed data becuase it comes out as a theano.tensor in the way
# our particular model is specified
ds2.observed_data['X_obs'] = xr.DataArray(X, dims=('time',), coords={'time':timein})

# This merges the data sets
ds2.extend(ds)

# Save 
ds2.to_netcdf(outputnc)

print(ds2)
print('Done')
print(72*'#')
Exemplo n.º 10
0
}

with pm.Model() as model:
    mu = pm.Normal('mu', mu=0, sd=5)
    tau = pm.HalfCauchy('tau', beta=5)
    theta_tilde = pm.Normal('theta_tilde', mu=0, sd=1, shape=eight_school_data['J'])
    theta = pm.Deterministic('theta', mu + tau * theta_tilde)
    pm.Normal('obs', mu=theta, sd=eight_school_data['sigma'], observed=eight_school_data['y'])

    trace = pm.sample(draws, chains=chains)
    prior = pm.sample_prior_predictive()
    posterior_predictive = pm.sample_posterior_predictive(trace)

    pm_data = az.from_pymc3(
            trace=trace,
            prior=prior,
            posterior_predictive=posterior_predictive,
            coords={'school': np.arange(eight_school_data['J'])},
            dims={'theta': ['school'], 'theta_tilde': ['school']},
        )
#pm_data

#%%
az.plot_posterior(pm_data)
plt.show()

#%%
data = az.load_arviz_data('centered_eight')
az.plot_posterior(data, coords={"school": ["Choate", "Deerfield"]})
plt.show()
    y = pm.Normal('y', mu = μ, sd = σ, observed = data)
    trace_g = pm.sample(1000)

az.plot_trace(trace_g)

# %%
az.plot_joint(trace_g, kind='kde', fill_last=False)

# %%
az.summary(trace_g)

# %%
y_pred_g = pm.sample_posterior_predictive(trace_g, 100, model_g)

# %%
data_ppc = az.from_pymc3(trace=trace_g, posterior_predictive=y_pred_g)
ax = az.plot_ppc(data_ppc, figsize=(12,6), mean=False)
ax[0].legend(fontsize=15)

# %%
np.mean(stats.t(loc=0, scale=1, df=100).rvs(100))

# %%
plt.figure(figsize=(10, 6))
x_values = np.linspace(-10, 10, 500)
for df in [1, 2, 30]:
    distri = stats.t(df)
    x_pdf = distri.pdf(x_values)
    plt.plot(x_values, x_pdf, label=fr'$\nu = {df}$', lw=3)

x_pdf = stats.norm.pdf(x_values)
ax.set_ylabel("kilocal per g (std)")

# %%
shared_N = shared(dcc["N"].values)

with pm.Model() as m5_5:
    sigma = pm.Exponential("sigma", 1)
    bN = pm.Normal("bN", 0, 0.5)
    a = pm.Normal("a", 0, 0.2)
    mu = pm.Deterministic("mu", a + bN * shared_N)

    K = pm.Normal("K", mu, sigma, observed=dcc["K"])

    m5_5_trace = pm.sample()

m5_5_data = az.from_pymc3(m5_5_trace)

# %%
az.summary(m5_5_trace, var_names=["a", "bN", "sigma"])

# %%
xseq = np.linspace(dcc["N"].min() - 0.15, dcc["N"].max() + 0.15, 30)

shared_N.set_value(xseq)

with m5_5:
    m5_5_posterior_predictive = pm.sample_posterior_predictive(
        m5_5_trace, var_names=["mu"], samples=4000)

mu_mean = m5_5_posterior_predictive["mu"].mean(axis=0)
Exemplo n.º 13
0
         label="Non-robust regression",
         alpha=0.5)
# plot the data
plt.plot(x, y, 'C0o')
# get the mean of the intercept from the posterior
alpha_m = trace["alpha"].mean()
# get the mean of the coefficient from the posterior
beta_m = trace["beta"].mean()
# plot the robust linear regression
plt.plot(x, alpha_m + beta_m * x, c="k", label="Robust linear regression")
# plot the variety of predicted results
az.plot_hpd(x, ppc["obs"])
# set the last details of the graph
plt.xlabel("x")
plt.ylabel("y", rotation=0)
#plt.legend(loc=2)
plt.tight_layout()
plt.show()

# ----------------- analyse the posterior -------------------- #

with model_t:
    az.plot_trace(trace, var_names=["alpha", "beta", "sigma", "vu"])
    # get the summary
    log.info("the trace summary is: %s", az.summary(trace))
    # let's also run a posterior predictive check
    ppc = pm.sample_posterior_predictive(trace, samples=2000)
    data_ppc = az.from_pymc3(trace=trace, posterior_predictive=ppc)
    ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=True)
    plt.xlim(0, 12)
Exemplo n.º 14
0
# reparameterized
# says this should have issue w/ parameter non-identifiablity, but seems fine
with pm.Model() as model_mg:
    p = pm.Dirichlet('p', a=np.ones(clusters))
    means = pm.Normal('means', mu=cs_exp.mean(), sd=10, shape=clusters)
    sd = pm.HalfNormal('sd', sd=10)
    y = pm.NormalMixture('y', w=p, mu=means, sd=sd, observed=cs_exp)
    trace_mg = pm.sample(random_seed=123)

varnames = ['means', 'p']
az.plot_trace(trace_mg, varnames)
az.summary(trace_mg, varnames)

ppc_mg = pm.sample_posterior_predictive(trace_mg, 2000, model=model_mg)
data_ppc = az.from_pymc3(trace=trace_mg, posterior_predictive=ppc_mg)
ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False)

clusters = 2
with pm.Model() as model_mgp:
    p = pm.Dirichlet('p', a=np.ones(clusters))
    means = pm.Normal('means', mu=np.array([.9, 1]) * cs_exp.mean(),
                      sd=10, shape=clusters)
    sd = pm.HalfNormal('sd', sd=10)

    # Potential adds a constraint to the model
    # sayi
    order_means = pm.Potential('order_means',
                               tt.switch(means[1]-means[0] < 0,
                                         -np.inf, 0))
    y = pm.NormalMixture('y', w=p, mu=means, sd=sd, observed=cs_exp)
Exemplo n.º 15
0
    def test_sample_posterior_predictive_w(self):
        data0 = np.random.normal(0, 1, size=50)
        warning_msg = "The number of samples is too small to check convergence reliably"

        with pm.Model() as model_0:
            mu = pm.Normal("mu", mu=0, sigma=1)
            y = pm.Normal("y", mu=mu, sigma=1, observed=data0)
            with pytest.warns(UserWarning, match=warning_msg):
                trace_0 = pm.sample(10,
                                    tune=0,
                                    chains=2,
                                    return_inferencedata=False)
            idata_0 = az.from_pymc3(trace_0)

        with pm.Model() as model_1:
            mu = pm.Normal("mu", mu=0, sigma=1, shape=len(data0))
            y = pm.Normal("y", mu=mu, sigma=1, observed=data0)
            with pytest.warns(UserWarning, match=warning_msg):
                trace_1 = pm.sample(10,
                                    tune=0,
                                    chains=2,
                                    return_inferencedata=False)
            idata_1 = az.from_pymc3(trace_1)

        with pm.Model() as model_2:
            # Model with no observed RVs.
            mu = pm.Normal("mu", mu=0, sigma=1)
            with pytest.warns(UserWarning, match=warning_msg):
                trace_2 = pm.sample(10, tune=0, return_inferencedata=False)

        traces = [trace_0, trace_1]
        idatas = [idata_0, idata_1]
        models = [model_0, model_1]

        ppc = pm.sample_posterior_predictive_w(traces, 100, models)
        assert ppc["y"].shape == (100, 50)

        ppc = pm.sample_posterior_predictive_w(idatas, 100, models)
        assert ppc["y"].shape == (100, 50)

        with model_0:
            ppc = pm.sample_posterior_predictive_w([idata_0.posterior], None)
            assert ppc["y"].shape == (20, 50)

        with pytest.raises(
                ValueError,
                match="The number of traces and weights should be the same"):
            pm.sample_posterior_predictive_w([idata_0.posterior],
                                             100,
                                             models,
                                             weights=[0.5, 0.5])

        with pytest.raises(
                ValueError,
                match="The number of models and weights should be the same"):
            pm.sample_posterior_predictive_w([idata_0.posterior], 100, models)

        with pytest.raises(
                ValueError,
                match=
                "The number of observed RVs should be the same for all models"
        ):
            pm.sample_posterior_predictive_w([trace_0, trace_2], 100,
                                             [model_0, model_2])
Exemplo n.º 16
0
                # Expand dims twice to get extra axes for chain and sample
                point_extra_dim = {
                    k: np.expand_dims(np.expand_dims(v, axis=0), axis=0)
                    for k, v in pm.find_MAP().items()
                }
                self.trace = az.from_dict(point_extra_dim)
            elif fit_method == 'mcmc':
                self.trace = pm.sample(tune=mcmc_iter,
                                       draws=mcmc_iter,
                                       return_inferencedata=True,
                                       chains=chains,
                                       cores=cores)
            elif fit_method == 'vi':
                self.approx = pm.fit(vi_iter)
                self.trace = self.approx.sample()
                self.trace = az.from_pymc3(self.trace)
            else:
                raise NotImplementedError(
                    f'Fit method {fit_method} not supported')

    def predict(self, X, predict_kwargs={'diag': True, 'pred_noise': True}):
        '''
        Generate GP predictions at new spatiotemporal coordinates.
        
        Arguments
        ---------
        X : sequence of Numpy arrays
            The first array will usually be a Sx2 array of spatial coordinates while the second array will be
            a Tx1 array of temporal coordinates.
        predict_kwargs : dict
            Additional keyword arguments passed to PyMC3 gp object. "diag" controls whether or not
Exemplo n.º 17
0
np.random.seed(909)
# simulate height
height = np.random.normal(10, 2, size=N)
# leg as proportion of height
leg_prop = np.random.uniform(low=0.4, high=0.5, size=N)
leg_left = leg_prop * height + np.random.normal(scale=0.02, size=N)
leg_right = leg_prop * height + np.random.normal(scale=0.02, size=N)
d = pd.DataFrame(dict(height=height, leg_left=leg_left, leg_right=leg_right))
d.head(2)
pd.plotting.scatter_matrix(d)
with pm.Model() as m1:
    br = pm.Normal('br', 2, 10)
    bl = pm.Normal('bl', 2, 10)
    sigma = pm.Exponential('sigma', 1)
    a = pm.Normal('a', 10, 100)
    mu = a + bl * d.leg_left + br * d.leg_right
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d.height)
    trc1 = pm.sample()
pm.summary(trc1, alpha=0.11)
ar.plot_forest(trc1,
               var_names=['a', 'bl', 'br'],
               combined=True,
               figsize=(5, 2))
pl.axvline(color='k', ls=':')

with m1:
    prior = pm.sample_prior_predictive()
ppc = pm.sample_posterior_predictive(trc1, samples=500, model=m1)
ar1 = ar.from_pymc3(trace=trc1, prior=prior, posterior_predictive=ppc)
ar.plot_pair(trc1, var_names=['a', 'br', 'bl'], divergences=True, kind='kde')
Exemplo n.º 18
0
    def fit(self, p1, p2, p1_mode, p2_mode, divisor = 20, deviation = 0.2):
        """Fits the model and plots the prior predictive distribution.

        Args:
            p1 (tuple): Tuple of integers, where the first value is the value of p 
            and the second value is the number of components. First value can be 
            specified as a float, while the second value must be an integer.
            p2 (tuple): Tuple of integers, where the first value is the value of p 
            and the second value is the number of components. First value can be 
            specified as a float, while the second value must be an integer.
            p1_mode (str): String indicating whether the seasonal component should be multiplicative or additive.
            If anything else than "multiplicative" is specified, the mode defaults to additive.
            p2_mode (str): String indicating whether the seasonal component should be multiplicative or additive.
            If anything else than "multiplicative" is specified, the mode defaults to additive.
            divisor (int, optional): A scaling parameter for adjusting the standard deviation of the distribution of p. Defaults to 20.
            deviation (float, optional): Parameter specifying the standard deviation of the beta for the seasonal component. Defaults to 0.2.

        Example:
            Pc.fit(p1 = (7, 2), p2 = (365.25, 2), p1_mode = "additive", p2_mode = "multiplicative", divisor = 15, deviation = 0.3)
        """        
        ## NB: we assume that input is in days. 

        ## common across week & month (I guess)
        ## NB: deviation might as well just go in on each place then.

        ## p1
        p1_mu, p1_components = p1
        p1_sd = p1_mu/divisor

        ## normalize week.
        p1_mu = (p1_mu - self.train[self.time_codes].min()) / (self.train[self.time_codes].max() - self.train[self.time_codes].min())
        p1_sd = (p1_sd - self.train[self.time_codes].min()) / (self.train[self.time_codes].max() - self.train[self.time_codes].min())
        beta_p1_sd = deviation

        ## p2
        p2_mu, p2_components = p2
        p2_sd = p2_mu/divisor

        ## normalize month.
        p2_mu = (p2_mu -  self.train[self.time_codes].min()) / (self.train[self.time_codes].max() - self.train[self.time_codes].min())
        p2_sd = (p2_sd - self.train[self.time_codes].min()) / (self.train[self.time_codes].max() - self.train[self.time_codes].min())
        beta_p2_sd = deviation
        
        
        with pm.Model(coords=self.coords) as m0: 
            # shared 
            t1_shared = pm.Data('t1_shared', self.t1_train)
            t2_shared = pm.Data('t2_shared', self.t2_train)
            t3_shared = pm.Data('t3_shared', np.array(self.t3_train))
            idx_shared = pm.Data('idx_shared', self.idx_train)

            # prepare fourier week
            #seasonal_component(name, name_beta, mu, sd, beta_sd, n_components, shape, time_scaled)
            beta_p1_waves, x_p1_waves, p1_flat = self.seasonal_component(name = "p1",
                                                               name_beta = "beta_p1_waves",
                                                               mu = p1_mu,
                                                               sd = p1_sd,
                                                               beta_sd = beta_p1_sd,
                                                               n_components = p1_components,
                                                               shape = self.n_train,
                                                               t1 = t1_shared,
                                                               t2 = t2_shared,
                                                               t3 = t3_shared, 
                                                                    mode = p1_mode)

            beta_p2_waves, x_p2_waves, p2_flat = self.seasonal_component(name = "p2",
                                                               name_beta = "beta_p2_waves",
                                                               mu = p2_mu,
                                                               sd = p2_sd,
                                                               beta_sd = beta_p2_sd,
                                                               n_components = p2_components,
                                                               shape = self.n_train,
                                                               t1 = t1_shared,
                                                               t2 = t2_shared,
                                                               t3 = t3_shared, 
                                                                    mode = p2_mode)

            # other priors
            beta_line = pm.Normal('beta_line', mu = 0, sd = 0.3, shape = self.n_train)
            alpha = pm.Normal('alpha', mu = 0.5, sd = 0.3, shape = self.n_train)

            mu = alpha[idx_shared] + beta_line[idx_shared] * t1_shared + p1_flat + p2_flat

            # sigma 
            sigma = pm.Exponential('sigma', 1)

            # likelihood 
            y_pred = pm.Normal(
                'y_pred', 
                mu = mu,
                sd = sigma,
                observed = self.y_train,
                dims = "idx")
    
            self.model = m0
        
    ##### Part 6: Sampling ######

        ## sample prior
        with self.model:
            prior_pred = pm.sample_prior_predictive(100) # like setting this low. 
            m0_idata = az.from_pymc3(prior=prior_pred)

        az.plot_ppc(
            m0_idata, 
            group="prior",
            figsize = (18, 10))