def get_waic_and_loo(fit): """Compute WAIC and LOO from a fit instance""" idata = az.from_pystan(fit, log_likelihood="llx") result = {} result.update(dict(az.loo(idata, scale='deviance'))) result.update(dict(az.waic(idata, scale='deviance'))) return result
def compare(m: Mapping[str, Trace]) -> pd.DataFrame: waics = [] models = [] for (expr, trace) in m.items(): waics.append(az.waic(trace)) models.append(str(expr)) measures = pd.DataFrame( {"waic": [w.waic for w in waics], "p_waic": [w.p_waic for w in waics],} ) measures.index = models measures["dwaic"] = np.max(measures.waic) - measures.waic measures["weights"] = compute_aikake_weights(measures.waic.values) measures.sort_values(by="dwaic", inplace=True) return measures
def compare_models(traces, exprs): waics = [] models = [] for (expr, trace) in zip(exprs, traces): waics.append(az.waic(trace)) models.append(str(expr)) measures = pd.DataFrame({ "model": models, "waic": [w.waic for w in waics], "p_waic": [w.p_waic for w in waics], }) measures["dwaic"] = np.max(measures.waic) - measures.waic measures["aikake_weights"] = compute_aikake_weights(measures.waic.values) return measures
def get_fit_quality(fit) -> dict: """Compute Widely-Available Information Criterion (WAIC) and Leave One Out (LOO) from a fit instance using Arviz. Args: fit: A PyStan4model instance (i.e. a PyStan fit). Returns: dict: WAIC and LOO statistics (and se's) for this fit. """ result = {} try: idata = az.from_pystan(fit, log_likelihood="llx") except KeyError as e: warn("'%s' not found; waic and loo will not be computed" % str(e), stacklevel=2) result.update({'waic': 0, 'loo': 0}) else: result.update(dict(az.loo(idata, scale='deviance'))) result.update(dict(az.waic(idata, scale='deviance'))) result.update({'lp__rhat': get_rhat(fit)}) return result
def extract_ic(model_data: TaskModel, ic: str = 'both', ncore: int = 2) \ -> Dict: """Extract model comparison estimates. Parameters ---------- model_data hBayesDM output objects from running model functions. ic Information criterion. 'looic', 'waic', or 'both'. Defaults to 'both'. ncore Number of cores to use when computing LOOIC. Defaults to 2. Returns ------- Dict Leave-One-Out and/or Watanabe-Akaike information criterion estimates. """ ic_options = ('looic', 'waic', 'both') if ic not in ic_options: raise RuntimeError('Information Criterion (ic) must be one of ' + repr(ic_options)) dat = az.from_pystan(model_data.fit, log_likelihood='log_lik') ret = {} if ic in ['looic', 'both']: ret['looic'] = az.loo(dat)['loo'] if ic in ['waic', 'both']: ret['waic'] = az.waic(dat)['waic'] return ret
trace = pm.sample() summary = az.summary(trace, var_names=['prob_response']) ary_prob_response = trace['prob_response'] print('\nP(correct|item,stage)\n') print('{0: >10s}'.format(' '), end='') for i in range(NStg): print("{0:>10s}".format('Stage-{}'.format(i)), end='') print(' ') for j in range(M): print('Item-{0: <5d}'.format(j + 1), end='') for s in range(NStg): print('{0: >10.5f}'.format(ary_prob_response[:, s, j].mean()), end='') print(' ') ary_p_stages = trace['p_stages'] ary_p_stages = ary_p_stages.T print("\nMean stages and their SD's\n") print('{0: >11s}{1:>10s}{2:>10s}'.format(' ', 'Mean', 'SD')) for i in range(len(ary_p_stages)): print('person-{0: <4d}{1:>10.3f}{2:>10.3f}'.format(i + 1, ary_p_stages[i].mean(), ary_p_stages[i].std())) v_waic = az.waic(trace) print('\nWAIC = {0:.3f} for the number of stages = {1}'.format( v_waic['waic'], NStg)) print('\nWAIC statistics...\n', v_waic)
r2 = ssreg / sstot plt.plot(x_n, ffit, label=f'order {i}, $R^2$= {r2:.2f}') plt.legend(loc=2) plt.xlabel('x') plt.ylabel('y', rotation=0) plt.savefig('B11197_05_05.png', dpi=300) plt.plot([10, 7], [9, 7], 'ks') plt.savefig('B11197_05_06.png', dpi=300) # ## Computing information criteria with PyMC3 # In[10]: waic_l = az.waic(trace_l) waic_l # In[11]: cmp_df = az.compare({ 'model_l': trace_l, 'model_p': trace_p }, method='BB-pseudo-BMA') cmp_df # In[12]: az.plot_compare(cmp_df) plt.savefig('B11197_05_08.png', dpi=300)
beta = pm.Normal("beta", mu=0, sd=10) sigma = pm.HalfCauchy("sigma", beta=10, testval=sigma_start) mu = alpha + beta * mass_shared kcal = pm.Normal("kcal", mu=mu, sd=sigma, observed=d["kcal.per.g"]) trace_m6_13 = pm.sample(1000, tune=1000) with pm.Model() as m6_14: alpha = pm.Normal("alpha", mu=0, sd=10, testval=a_start) beta = pm.Normal("beta", mu=0, sd=10, shape=2) sigma = pm.HalfCauchy("sigma", beta=10, testval=sigma_start) mu = alpha + beta[0] * mass_shared + beta[1] * neocortex_shared kcal = pm.Normal("kcal", mu=mu, sd=sigma, observed=d["kcal.per.g"]) trace_m6_14 = pm.sample(5000, tune=15000) # %% az.waic(trace_m6_14, m6_14) # %% compare_df = az.compare( { "m6_11": trace_m6_11, "m6_12": trace_m6_12, "m6_13": trace_m6_13, "m6_14": trace_m6_14, }, method="pseudo-BMA", ) compare_df # %% az.plot_compare(compare_df)
p = np.poly1d(coeffs) yhat = p(x) ybar = np.mean(y) ssreg = np.sum((yhat - ybar)**2) sstot = np.sum((y - ybar)**2) r2 = ssreg / sstot plt.plot(x_n, ffit, label=f'order {i}, $R^2$= {r2:.2f}') plt.legend(loc=2) plt.xlabel('x') plt.ylabel('y', rotation=0) # %% waic_1 = az.waic(trace_1) waic_1 # %% cmp_df = az.compare({ 'model_1': trace_1, 'model_p': trace_p }, method='BB-pseudo-BMA') cmp_df # %% az.plot_compare(cmp_df) # %% w = .5 y_lp = pm.sample_posterior_predictive_w([trace_1, trace_p],
for d_sim, c in zip([y_l, y_p], ['C1', 'C2']): T_sim = func(d_sim, 1) p_value = np.mean(T_sim >= T_obs) az.plot_kde(T_sim, plot_kwargs={'color': c}, label=f'p-value {p_value:.2f}', ax=ax[idx]) ax[idx].set_title(func.__name__) ax[idx].set_yticks([]) ax[idx].legend() ########################################### # Computing information criteria with PyMC3 ########################################### waic_l = az.waic(trace_l) waic_l cmp_df = az.compare({ 'model_l': trace_l, 'model_p': trace_p }, method='BB-pseudo-BMA') cmp_df ######################## # computing bayes factor ######################## coins = 30 # 300 heads = 9 # 90
trace_8_1_df = pm.trace_to_dataframe(trace_8_1_4_chains) grid = (sns.PairGrid( trace_8_1_df, x_vars=["a", "bR", "bA", "bAR", "sigma"], y_vars=["a", "bR", "bA", "bAR", "sigma"], diag_sharey=False, ).map_diag(sns.kdeplot).map_upper(plt.scatter, alpha=0.1).map_lower(plot_corr)) # %% m8_1.logp( {varname: trace_8_1[varname].mean() for varname in trace_8_1.varnames}) # %% az.waic(trace_8_1) # %% az.plot_trace(trace_8_1) # %% y = np.array([-1.0, 1.0]) with pm.Model() as m8_2: alpha = pm.Flat("alpha") sigma = pm.Bound(pm.Flat, lower=0.0)("sigma") y_obs = pm.Normal("y_obs", alpha, sigma, observed=y) # %% with m8_2:
#Likelihood y = pm.Normal("y", mu, sigma=sigma, observed=log_electricity, dims="obs_id") #Fitting with partial_pooling_2: approx = pm.fit(n=50000, method='fullrank_advi', callbacks=[CheckParametersConvergence(tolerance=0.01)]) partial_pooled_loo = az.loo(partial_pooling_trace, partial_pooling_2) partial_pooled_waic = az.waic(partial_pooling_trace, partial_pooling_2) with pm.Model(coords=coords_2) as no_pooling_2: profile_cluster_idx = pm.Data("profile_cluster_idx", clusters, dims="obs_id") heat_temp_cluster_idx = pm.Data("heat_temp_cluster_idx", heat_clusters, dims="obs_id") cool_temp_cluster_idx = pm.Data("cool_temp_cluster_idx", cool_clusters, dims="obs_id") daypart = pm.Data("daypart", dayparts, dims="obs_id") fs_sin_1 = pm.Data("fs_sin_1", daypart_fs_sin_1, dims="obs_id") fs_sin_2 = pm.Data("fs_sin_2", daypart_fs_sin_2, dims="obs_id")
## Pareto smoothed importance sampling leave-one-out cross validation [Cross-validation](https://en.wikipedia.org/wiki/Cross-validation_(statistics)) (CV) is another method of estimating out-of-sample prediction accuracy. This method requires re-fitting a model many times, each time excluding a portion of the data, the excluded portion is then used to measure the accuracy of them model. This process is repeated many times and the estimated accuracy of the model will be the average of each run. Then the entire dataset is used to fit the model one more time and this is the model used for further analysis and/or predictions. Leave-one-out cross-validation (LOO-CV) is a particular type of cross-validation when the data excluded is a single data-point. As CV can be quite time consuming (especially for Bayesian models) it is interesting to note that in theory it is possible to approximate LOO-CV. A practical and computational efficient way to do it requires using a combination of strategies that includes what is called [Pareto smoothed importance sampling](https://arxiv.org/abs/1507.02646). The resulting method is known as PSIS-LOO-CV which, while very useful, has a very complicated name, thus we just call it LOO. While LOO and WAIC approximate two different quantities, asymptotically they converge to the same numerical value, and also in practice they generally agree. The main advantage of LOO is that it is more informative as it provides [useful diagnostics](https://arxiv.org/abs/1507.04544) and other goodies such as effective sample size and Monte Carlo standard error estimates. Using ArviZ, both LOO and WAIC can be computed just by calling a function. Let's try on an arbitrary pre-loaded model: # change this to some good example model0 = az.load_arviz_data('regression1d') model1 = az.load_arviz_data('regression1d') az.waic(model0) az.loo(model0) As you can see both WAIC and LOO return similar values. ArviZ comes equipped with the `compare(.)` function. That is more convenient than using `loo(.)` or `waic(.)` az.loo(model0) ## The compare function This function takes a dictionary of names (keys) and models (values) as input and returns a DataFrame ordered (row-wise) from best to worst model. cmp = az.compare({"m0":model0, "m1":model1,}) cmp
def compare_models(df, models: dict, extra_model_args: list = None, parallel=False, plotose=False, **kwargs): """ kwargs are forwarded to split_train_predict->fit_numpyro compare_models(models={'Hier':bayes.Numpyro.model_hier, 'Hier+covariance':bayes.Numpyro.model_hier_covar, 'Twostep Exponential':bayes.TwoStep.model_twostep, 'Twostep Gamma':bayes.TwoStep.model_twostep, }, data=[df,df,df_monster,df_monster], extra_args=[{}, {}, {'prior':'Exponential'}, {'prior':'Gamma'}]) """ # TODO save all model args in BayesWindow in self # Calculate extra_model_args = extra_model_args or np.tile({}, len(models)) if parallel: traces = Parallel(n_jobs=min(os.cpu_count(), len(models)))( delayed(split_train_predict)( df, model, num_chains=1, **kwargs, **extra_model_arg) for model, extra_model_arg in zip(models.values(), extra_model_args)) else: traces = [ split_train_predict(df, model, y=kwargs['y'], **extra_model_arg) for model, extra_model_arg in zip(tqdm(models.values()), extra_model_args) ] # save tp dict traces_dict = {} # initialize results for key, trace in zip(models.keys(), traces): traces_dict[key] = trace # Plot if plotose: for trace_name, trace in traces_dict.items(): # Plot PPC az.plot_ppc( trace, # flatten=[treatment], # flatten_pp=data_cols[2], mean=False, # num_pp_samples=1000, # kind='cumulative' ) plt.title(trace_name) plt.show() r2(trace) # Weird that r2=1 # Waic try: print('======= WAIC (higher is better): =========') print(az.waic(trace, pointwise=True)) print(az.waic(trace, var_name='y')) except TypeError: pass try: for trace_name in traces_dict.keys(): trace = traces_dict[trace_name] # Print diagnostics and effect size print( f"n(Divergences) = {trace.sample_stats.diverging.sum(['chain', 'draw']).values}" ) try: slope = trace.posterior['v_mu'].sel({ 'v_mu_dim_0': 1 }).mean(['chain']).values except Exception: slope = trace.posterior['b'].mean(['chain']).values print( f'Effect size={(slope.mean() / slope.std()).round(2)} == {trace_name}' ) except Exception: pass model_compare = az.compare(traces_dict) # , var_name='y') az.plot_compare(model_compare, textsize=12, show=True) return model_compare