示例#1
0
def get_waic_and_loo(fit):
    """Compute WAIC and LOO from a fit instance"""
    idata = az.from_pystan(fit, log_likelihood="llx")
    result = {}
    result.update(dict(az.loo(idata, scale='deviance')))
    result.update(dict(az.waic(idata, scale='deviance')))
    return result
示例#2
0
def compare(m: Mapping[str, Trace]) -> pd.DataFrame:
    waics = []
    models = []
    for (expr, trace) in m.items():
        waics.append(az.waic(trace))
        models.append(str(expr))
    measures = pd.DataFrame(
        {"waic": [w.waic for w in waics], "p_waic": [w.p_waic for w in waics],}
    )
    measures.index = models
    measures["dwaic"] = np.max(measures.waic) - measures.waic
    measures["weights"] = compute_aikake_weights(measures.waic.values)
    measures.sort_values(by="dwaic", inplace=True)
    return measures
示例#3
0
def compare_models(traces, exprs):
    waics = []
    models = []
    for (expr, trace) in zip(exprs, traces):
        waics.append(az.waic(trace))
        models.append(str(expr))
    measures = pd.DataFrame({
        "model": models,
        "waic": [w.waic for w in waics],
        "p_waic": [w.p_waic for w in waics],
    })
    measures["dwaic"] = np.max(measures.waic) - measures.waic
    measures["aikake_weights"] = compute_aikake_weights(measures.waic.values)
    return measures
示例#4
0
def get_fit_quality(fit) -> dict:
    """Compute Widely-Available Information Criterion (WAIC) and
    Leave One Out (LOO) from a fit instance using Arviz.

    Args:
        fit: A PyStan4model instance (i.e. a PyStan fit).

    Returns:
        dict: WAIC and LOO statistics (and se's) for this fit.
    """
    result = {}
    try:
        idata = az.from_pystan(fit, log_likelihood="llx")
    except KeyError as e:
        warn("'%s' not found; waic and loo will not be computed" % str(e),
             stacklevel=2)
        result.update({'waic': 0, 'loo': 0})
    else:
        result.update(dict(az.loo(idata, scale='deviance')))
        result.update(dict(az.waic(idata, scale='deviance')))
    result.update({'lp__rhat': get_rhat(fit)})
    return result
示例#5
0
def extract_ic(model_data: TaskModel,
               ic: str = 'both',
               ncore: int = 2) \
        -> Dict:
    """Extract model comparison estimates.

    Parameters
    ----------
    model_data
        hBayesDM output objects from running model functions.
    ic
        Information criterion. 'looic', 'waic', or 'both'. Defaults to 'both'.
    ncore
        Number of cores to use when computing LOOIC. Defaults to 2.

    Returns
    -------
    Dict
        Leave-One-Out and/or Watanabe-Akaike information criterion estimates.
    """
    ic_options = ('looic', 'waic', 'both')
    if ic not in ic_options:
        raise RuntimeError('Information Criterion (ic) must be one of ' +
                           repr(ic_options))

    dat = az.from_pystan(model_data.fit, log_likelihood='log_lik')

    ret = {}

    if ic in ['looic', 'both']:
        ret['looic'] = az.loo(dat)['loo']

    if ic in ['waic', 'both']:
        ret['waic'] = az.waic(dat)['waic']

    return ret
示例#6
0
    trace = pm.sample()

summary = az.summary(trace, var_names=['prob_response'])
ary_prob_response = trace['prob_response']

print('\nP(correct|item,stage)\n')
print('{0: >10s}'.format(' '), end='')
for i in range(NStg):
    print("{0:>10s}".format('Stage-{}'.format(i)), end='')
print(' ')

for j in range(M):
    print('Item-{0: <5d}'.format(j + 1), end='')
    for s in range(NStg):
        print('{0: >10.5f}'.format(ary_prob_response[:, s, j].mean()), end='')
    print(' ')

ary_p_stages = trace['p_stages']
ary_p_stages = ary_p_stages.T
print("\nMean stages and their SD's\n")
print('{0: >11s}{1:>10s}{2:>10s}'.format(' ', 'Mean', 'SD'))
for i in range(len(ary_p_stages)):
    print('person-{0: <4d}{1:>10.3f}{2:>10.3f}'.format(i + 1,
                                                       ary_p_stages[i].mean(),
                                                       ary_p_stages[i].std()))
v_waic = az.waic(trace)

print('\nWAIC = {0:.3f}  for the number of stages = {1}'.format(
    v_waic['waic'], NStg))
print('\nWAIC statistics...\n', v_waic)
示例#7
0
    r2 = ssreg / sstot

    plt.plot(x_n, ffit, label=f'order {i}, $R^2$= {r2:.2f}')

plt.legend(loc=2)
plt.xlabel('x')
plt.ylabel('y', rotation=0)
plt.savefig('B11197_05_05.png', dpi=300)
plt.plot([10, 7], [9, 7], 'ks')
plt.savefig('B11197_05_06.png', dpi=300)

# ## Computing information criteria with PyMC3

# In[10]:

waic_l = az.waic(trace_l)
waic_l

# In[11]:

cmp_df = az.compare({
    'model_l': trace_l,
    'model_p': trace_p
},
                    method='BB-pseudo-BMA')
cmp_df

# In[12]:

az.plot_compare(cmp_df)
plt.savefig('B11197_05_08.png', dpi=300)
示例#8
0
    beta = pm.Normal("beta", mu=0, sd=10)
    sigma = pm.HalfCauchy("sigma", beta=10, testval=sigma_start)
    mu = alpha + beta * mass_shared
    kcal = pm.Normal("kcal", mu=mu, sd=sigma, observed=d["kcal.per.g"])
    trace_m6_13 = pm.sample(1000, tune=1000)

with pm.Model() as m6_14:
    alpha = pm.Normal("alpha", mu=0, sd=10, testval=a_start)
    beta = pm.Normal("beta", mu=0, sd=10, shape=2)
    sigma = pm.HalfCauchy("sigma", beta=10, testval=sigma_start)
    mu = alpha + beta[0] * mass_shared + beta[1] * neocortex_shared
    kcal = pm.Normal("kcal", mu=mu, sd=sigma, observed=d["kcal.per.g"])
    trace_m6_14 = pm.sample(5000, tune=15000)

# %%
az.waic(trace_m6_14, m6_14)

# %%
compare_df = az.compare(
    {
        "m6_11": trace_m6_11,
        "m6_12": trace_m6_12,
        "m6_13": trace_m6_13,
        "m6_14": trace_m6_14,
    },
    method="pseudo-BMA",
)
compare_df

# %%
az.plot_compare(compare_df)
示例#9
0
    p = np.poly1d(coeffs)
    yhat = p(x)
    ybar = np.mean(y)
    ssreg = np.sum((yhat - ybar)**2)
    sstot = np.sum((y - ybar)**2)
    r2 = ssreg / sstot

    plt.plot(x_n, ffit, label=f'order {i}, $R^2$= {r2:.2f}')

plt.legend(loc=2)
plt.xlabel('x')
plt.ylabel('y', rotation=0)

# %%
waic_1 = az.waic(trace_1)
waic_1
# %%
cmp_df = az.compare({
    'model_1': trace_1,
    'model_p': trace_p
},
                    method='BB-pseudo-BMA')
cmp_df

# %%
az.plot_compare(cmp_df)

# %%
w = .5
y_lp = pm.sample_posterior_predictive_w([trace_1, trace_p],
示例#10
0
    for d_sim, c in zip([y_l, y_p], ['C1', 'C2']):
        T_sim = func(d_sim, 1)
        p_value = np.mean(T_sim >= T_obs)
        az.plot_kde(T_sim,
                    plot_kwargs={'color': c},
                    label=f'p-value {p_value:.2f}',
                    ax=ax[idx])
    ax[idx].set_title(func.__name__)
    ax[idx].set_yticks([])
    ax[idx].legend()

###########################################
# Computing information criteria with PyMC3
###########################################

waic_l = az.waic(trace_l)
waic_l

cmp_df = az.compare({
    'model_l': trace_l,
    'model_p': trace_p
},
                    method='BB-pseudo-BMA')
cmp_df

########################
# computing bayes factor
########################

coins = 30  # 300
heads = 9  # 90
示例#11
0
trace_8_1_df = pm.trace_to_dataframe(trace_8_1_4_chains)
grid = (sns.PairGrid(
    trace_8_1_df,
    x_vars=["a", "bR", "bA", "bAR", "sigma"],
    y_vars=["a", "bR", "bA", "bAR", "sigma"],
    diag_sharey=False,
).map_diag(sns.kdeplot).map_upper(plt.scatter, alpha=0.1).map_lower(plot_corr))

# %%
m8_1.logp(
    {varname: trace_8_1[varname].mean()
     for varname in trace_8_1.varnames})

# %%
az.waic(trace_8_1)

# %%
az.plot_trace(trace_8_1)

# %%
y = np.array([-1.0, 1.0])

with pm.Model() as m8_2:
    alpha = pm.Flat("alpha")
    sigma = pm.Bound(pm.Flat, lower=0.0)("sigma")

    y_obs = pm.Normal("y_obs", alpha, sigma, observed=y)

# %%
with m8_2:
    #Likelihood
    y = pm.Normal("y",
                  mu,
                  sigma=sigma,
                  observed=log_electricity,
                  dims="obs_id")

#Fitting
with partial_pooling_2:
    approx = pm.fit(n=50000,
                    method='fullrank_advi',
                    callbacks=[CheckParametersConvergence(tolerance=0.01)])

partial_pooled_loo = az.loo(partial_pooling_trace, partial_pooling_2)
partial_pooled_waic = az.waic(partial_pooling_trace, partial_pooling_2)

with pm.Model(coords=coords_2) as no_pooling_2:
    profile_cluster_idx = pm.Data("profile_cluster_idx",
                                  clusters,
                                  dims="obs_id")
    heat_temp_cluster_idx = pm.Data("heat_temp_cluster_idx",
                                    heat_clusters,
                                    dims="obs_id")
    cool_temp_cluster_idx = pm.Data("cool_temp_cluster_idx",
                                    cool_clusters,
                                    dims="obs_id")
    daypart = pm.Data("daypart", dayparts, dims="obs_id")

    fs_sin_1 = pm.Data("fs_sin_1", daypart_fs_sin_1, dims="obs_id")
    fs_sin_2 = pm.Data("fs_sin_2", daypart_fs_sin_2, dims="obs_id")
示例#13
0
## Pareto smoothed importance sampling leave-one-out cross validation

[Cross-validation](https://en.wikipedia.org/wiki/Cross-validation_(statistics)) (CV) is another method of estimating out-of-sample prediction accuracy. This method requires re-fitting a model many times, each time excluding a portion of the data, the excluded portion is then used to measure the accuracy of them model. This process is repeated many times and the estimated accuracy of the model will be the average of each run. Then the entire dataset is used to fit the model one more time and this is the model used for further analysis and/or predictions. Leave-one-out cross-validation (LOO-CV) is a particular type of cross-validation when the data excluded is a single data-point. 

As CV can be quite time consuming (especially for Bayesian models) it is interesting to note that in theory it is possible to approximate LOO-CV. A practical and computational efficient way to do it requires using a combination of strategies that includes what is called [Pareto smoothed importance sampling](https://arxiv.org/abs/1507.02646). The resulting method is known as PSIS-LOO-CV which, while very useful, has a very complicated name, thus we just call it LOO. 


While LOO and WAIC approximate two different quantities, asymptotically they converge to the same numerical value, and also in practice they generally agree. The main advantage of LOO is that it is more informative as it provides [useful diagnostics](https://arxiv.org/abs/1507.04544) and other goodies such as effective sample size and Monte Carlo standard error estimates.

Using ArviZ, both LOO and WAIC can be computed just by calling a function. Let's try on an arbitrary pre-loaded model:

# change this to some good example
model0 = az.load_arviz_data('regression1d')
model1 = az.load_arviz_data('regression1d')

az.waic(model0)

az.loo(model0)

As you can see both WAIC and LOO return similar values. ArviZ comes equipped with the `compare(.)` function. That is more convenient than using `loo(.)` or `waic(.)` 

az.loo(model0)

## The compare function

This function takes a dictionary of names (keys) and models (values) as input and returns a DataFrame ordered (row-wise) from best to worst model.



cmp = az.compare({"m0":model0, "m1":model1,})
cmp
示例#14
0
def compare_models(df,
                   models: dict,
                   extra_model_args: list = None,
                   parallel=False,
                   plotose=False,
                   **kwargs):
    """
    kwargs are forwarded to split_train_predict->fit_numpyro
    compare_models(models={'Hier':bayes.Numpyro.model_hier,
                           'Hier+covariance':bayes.Numpyro.model_hier_covar,
                           'Twostep Exponential':bayes.TwoStep.model_twostep,
                           'Twostep Gamma':bayes.TwoStep.model_twostep,
                          },
                   data=[df,df,df_monster,df_monster],
                   extra_args=[{}, {}, {'prior':'Exponential'}, {'prior':'Gamma'}])
    """
    # TODO save all model args in BayesWindow in self
    # Calculate
    extra_model_args = extra_model_args or np.tile({}, len(models))
    if parallel:
        traces = Parallel(n_jobs=min(os.cpu_count(), len(models)))(
            delayed(split_train_predict)(
                df, model, num_chains=1, **kwargs, **extra_model_arg)
            for model, extra_model_arg in zip(models.values(),
                                              extra_model_args))
    else:
        traces = [
            split_train_predict(df, model, y=kwargs['y'], **extra_model_arg)
            for model, extra_model_arg in zip(tqdm(models.values()),
                                              extra_model_args)
        ]

    # save tp dict
    traces_dict = {}  # initialize results
    for key, trace in zip(models.keys(), traces):
        traces_dict[key] = trace

    # Plot
    if plotose:
        for trace_name, trace in traces_dict.items():

            # Plot PPC
            az.plot_ppc(
                trace,
                # flatten=[treatment],
                # flatten_pp=data_cols[2],
                mean=False,
                # num_pp_samples=1000,
                # kind='cumulative'
            )
            plt.title(trace_name)
            plt.show()
            r2(trace)
            # Weird that r2=1
            # Waic
            try:
                print('======= WAIC (higher is better): =========')
                print(az.waic(trace, pointwise=True))
                print(az.waic(trace, var_name='y'))
            except TypeError:
                pass

        try:
            for trace_name in traces_dict.keys():
                trace = traces_dict[trace_name]
                # Print diagnostics and effect size
                print(
                    f"n(Divergences) = {trace.sample_stats.diverging.sum(['chain', 'draw']).values}"
                )
                try:
                    slope = trace.posterior['v_mu'].sel({
                        'v_mu_dim_0': 1
                    }).mean(['chain']).values
                except Exception:
                    slope = trace.posterior['b'].mean(['chain']).values
                print(
                    f'Effect size={(slope.mean() / slope.std()).round(2)}  == {trace_name}'
                )
        except Exception:
            pass

    model_compare = az.compare(traces_dict)  # , var_name='y')
    az.plot_compare(model_compare, textsize=12, show=True)
    return model_compare