def __init__(self, α: np.ndarray, z: np.ndarray, bor: float): """ :param α: sufficient statistics of the posterior Dirichlet density on model/family frequencies :param z: posterior probabilities for each subject to belong to each model/family :param bor: Bayesian omnibus risk p(y|H0)/(p(y|H0)+p(y|H1)) """ self.attribution = z.copy() self.frequency_mean = dirichlet.mean(α) self.frequency_var = dirichlet.var(α) self.exceedance_probability = exceedance_probability(dirichlet(α)) self.protected_exceedance_probability = self.exceedance_probability * ( 1 - bor) + bor / len(α) # (7)
def test_frozen_dirichlet(): np.random.seed(2846) n = np.random.randint(1, 32) alpha = np.random.uniform(10e-10, 100, n) d = dirichlet(alpha) assert_equal(d.var(), dirichlet.var(alpha)) assert_equal(d.mean(), dirichlet.mean(alpha)) assert_equal(d.entropy(), dirichlet.entropy(alpha)) num_tests = 10 for i in range(num_tests): x = np.random.uniform(10e-10, 100, n) x /= np.sum(x) assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha)) assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
n_samples = 10000 # Observed Data count_obs = OrderedDict({'id1': 87, 'id2': 34, 'id3': 1}) counts = np.array(list(count_obs.values()), dtype=int) dirichlet_prior = np.ones_like( counts) # uninformative prior based on pseudo-counts dirichlet_posterior = dirichlet_prior + counts prior_samples = get_samples(dirichlet_prior) posterior_samples = get_samples(dirichlet_posterior) print('prior means: %s' % (str(dlt.mean(dirichlet_prior)))) PoM = dlt.mean(dirichlet_posterior) print('posterior means: %s' % (str(PoM))) PoV = dlt.var(dirichlet_posterior) print('posterior variances: %s' % (str(PoV))) print('naive posterior means: %s' % ((counts + 1) / np.sum(counts + 1)) ) # expected from value counts plus assumed prior counts print('Entropy DLT prior:', dlt.entropy(dirichlet_prior)) print('Entropy DLT posterior:', dlt.entropy(dirichlet_posterior)) if plot_priors: plt.figure(figsize=(9, 6)) for i, label in enumerate(count_obs.keys()): ax = plt.hist(prior_samples[:, i], bins=50, density=True, alpha=.35, label=label, histtype='stepfilled')
def compute_LPV_from_parameters(alpha_vector): M = dlt.mean(alpha_vector) V = dlt.var(alpha_vector) LPV = M - 1.65 * np.sqrt(V) # 5-percentile return np.where(LPV < 0, 0, LPV)
def var(self): return dirichlet.var(self.alpha)