Пример #1
0
def rank_bayesian(data, alpha, verbose, all_normal, order, rope, rope_mode,
                  nsamples, effect_size):
    # TODO check if some outputs for the verbose mode would be helpful
    if all_normal:
        order_column = 'mean'
    else:
        order_column = 'median'
    result_df, effsize_method = _create_result_df_skeleton(
        data, alpha / len(data.columns), all_normal, order, order_column,
        effect_size)
    result_df = result_df.drop('meanrank', axis='columns')
    result_df['p_equal'] = np.nan
    result_df['p_smaller'] = np.nan
    result_df['decision'] = 'NA'

    # re-order columns to have the same order as results
    reordered_data = data.reindex(result_df.index, axis=1)

    posterior_matrix = pd.DataFrame(index=reordered_data.columns,
                                    columns=reordered_data.columns)
    decision_matrix = pd.DataFrame(index=reordered_data.columns,
                                   columns=reordered_data.columns)
    for i in range(len(data.columns)):
        for j in range(i + 1, len(reordered_data.columns)):
            if rope_mode == 'effsize':
                # half the size of a small effect size following Kruschke (2018)
                if all_normal:
                    cur_rope = rope * _pooled_std(reordered_data.iloc[:, i],
                                                  reordered_data.iloc[:, j])
                else:
                    cur_rope = rope * _pooled_mad(reordered_data.iloc[:, i],
                                                  reordered_data.iloc[:, j])
            elif rope_mode == 'absolute':
                cur_rope = rope
            else:
                raise ValueError(
                    "Unknown rope_mode method, this should not be possible.")
            posterior_probabilities = two_on_multiple(x=reordered_data.iloc[:,
                                                                            i],
                                                      y=reordered_data.iloc[:,
                                                                            j],
                                                      rope=cur_rope,
                                                      nsamples=nsamples)
            posterior_matrix.iloc[i, j] = posterior_probabilities
            decision_matrix.iloc[i, j] = _posterior_decision(
                posterior_probabilities, alpha)
            decision_matrix.iloc[j, i] = _posterior_decision(
                posterior_probabilities[::-1], alpha)
            if i == 0:
                # comparison with "best"
                result_df.loc[result_df.index[j],
                              'p_equal'] = posterior_probabilities[1]
                result_df.loc[result_df.index[j],
                              'p_smaller'] = posterior_probabilities[0]
                result_df.loc[result_df.index[j],
                              'decision'] = _posterior_decision(
                                  posterior_probabilities, alpha)

    return _BayesResult(result_df, posterior_matrix, decision_matrix,
                        effsize_method)
Пример #2
0
def bayes_wins(a, b, width=0.1, independant=False, score=False):
    """ Compare a and b using a Bayesian signed-ranks test.

    Args:
        a: Ballot representing one candidate (array-like).
        b: Ballot representing one candidate (array-like).
        width: the width of the region of practical equivalence.
        independant: True if the different scores are correlated (e.g. bootstraps or cross-validation scores).
        score: If True, returns the probability of winning instead of a boolean.
    """
    a, b = np.array(a), np.array(b)
    if independant:
        p_a, p_tie, p_b = two_on_multiple(a, b, rope=width)
    else:
        p_a, p_tie, p_b = two_on_single(a, b, rope=width)
    if score:
        res = p_a
    else:
        res = p_a == max([p_a, p_tie, p_b])
    return res
Пример #3
0
data_nbc = get_data("nbc", "squash-unstored")
data_aode = get_data("aode", "squash-unstored")

data_nbc = get_data("nbc", aggregate=True)
data_aode = get_data("aode", aggregate=True)
data_j48 = get_data("j48", aggregate=True)

t = bc.SignTest(data_nbc, data_aode, 1)
print(t.probs())
t.plot()

print(bc.SignTest.probs(data_nbc, data_aode, 1))
bc.SignTest.plot(data_nbc, data_aode, 1)

data_nbc = get_data("nbc")
data_aode = get_data("aode")
print(bc.two_on_multiple(data_nbc, data_aode, 0.1, runs=10))
sample = bc.HierarchicalTest.sample(data_nbc, data_aode, 0.3, runs=10)
bc.HierarchicalTest.plot(data_nbc,
                         data_aode,
                         0.3,
                         runs=10,
                         names=("nbc", "aode"))

sample = bc.HierarchicalTest(data_nbc, data_aode, 0.3, runs=10)
sample.plot(names=("nbc", "aode"))
print(sample.probs())

plt.show()