Exemplo n.º 1
0
    def get_raw_probs(self, k, n, x, a, Stat):
        if (n, x, a) not in self.rawProbs:
            #for B1, polymorphism is 1, substitution is 0
            if Stat == "B1":
                #sanity check
                assert set(k) == set([0, 1])
                b = self._get_b(x, a)
                probs = np.where(k == 0,
                                 betabinom(n, a, b).pmf(n),
                                 (1. - betabinom(n, a, b).pmf(n) -
                                  betabinom(n, a, b).pmf(n)))
                self.rawProbs[(n, x, a)] = probs

            elif Stat == "B2" or Stat == 'B0':
                self.rawProbs[(n, x,
                               a)] = self._get_betabinom_probs(k, n, x, a)

            elif Stat == "B2maf" or Stat == 'B0maf':
                probs = self._get_betabinom_probs(
                    k, n, x, a) + self._get_betabinom_probs(n - k, n, x, a)
                #n/2 double counted if n is even
                if n % 2 == 0:
                    probs = np.where(k == int(n / 2), probs / 2, probs)

                self.rawProbs[(n, x, a)] = probs

        return (self.rawProbs[(n, x, a)])
Exemplo n.º 2
0
    def fit(self):
        """Fits the null and alternative models."""

        if not self.binomial:
            # attempt Beta-Binomial parameter estimation
            data = np.hstack([self.a, self.d - self.a])
            self.alpha1, self.niter1 = fit_polya(data=data)
            if np.isinf(self.alpha1).all() or (self.alpha1 == 0).all():
                self.binomial = True
                msg = 'Overdispersion estimate out of bounds.'
                msg += ' Reverting to Binomial LRT.'
                print(msg)

        # fit alternative model / estimate negative log-likelihoods
        if not self.binomial:
            # fit overdispersion parameter
            m0 = np.asarray([self.m0, 1 - self.m0])
            s0, self.niter0 = fit_polya_precision(data, m=m0)
            self.alpha0 = reparameterize_polya_ms(m0, s0)

            self.nll0 = -betabinom(n=self.d,
                                   a=self.alpha0[0],
                                   b=self.alpha0[1]).logpmf(self.a).sum()
            self.nll1 = -betabinom(n=self.d,
                                   a=self.alpha1[0],
                                   b=self.alpha1[1]).logpmf(self.a).sum()
        else:
            # no overdispersion estimate desired / possible, estimate mean
            self.m1 = self.a.sum() / self.d.sum()
            self.nll0 = -binom(n=self.d, p=self.m0).logpmf(self.a).sum()
            self.nll1 = -binom(n=self.d, p=self.m1).logpmf(self.a).sum()
Exemplo n.º 3
0
def plot_beta_binomial_fit(data,
                           fit_results,
                           title=None,
                           x_label=None,
                           x_range=None,
                           y_range=None,
                           fig_size=(6, 5),
                           bin_width=1,
                           filename=None):
    """
    :param data: (numpy.array) observations
    :param fit_results: dictionary with keys "a", "b", "n", and "loc"
    :param title: title of the figure
    :param x_label: label to show on the x-axis of the histogram
    :param x_range: (tuple) x range
    :param y_range: (tuple) y range
        (the histogram shows the probability density so the upper value of y_range should be 1).
    :param fig_size: int, specify the figure size
    :param bin_width: bin width
    :param filename: filename to save the figure as
    """

    plot_fit_discrete(data=data,
                      dist=stat.betabinom(n=fit_results['n'],
                                          a=fit_results['a'],
                                          b=fit_results['b'],
                                          loc=fit_results['loc']),
                      label='Beta-Binomial',
                      bin_width=bin_width,
                      title=title,
                      x_label=x_label,
                      x_range=x_range,
                      y_range=y_range,
                      fig_size=fig_size,
                      filename=filename)
Exemplo n.º 4
0
def compute_bb_nll(a, d, mu, theta):
    """Computes negative log-likelihood for Beta-Binomial model.

    Covers limit cases theta = 0 (Binomial) and theta = inf (Bernoulli).

    Args:
        a: Vector successes.
        d: Vector of trials.
        mu: Mean of the distribution. Has to be the same shape as a and d.
        theta: Dispersion parameter.

    Returns:
        Negative log-likelihood.
    """
    a = atleast_2d_column(a)
    d = atleast_2d_column(d)
    mu = atleast_2d_column(mu)

    if (mu > 1).any() or (mu < 0).any():
        raise ValueError('mu has to be between 0 and 1.')
    if a.size != d.size or a.size != mu.size:
        raise ValueError('a, d and mu have to be of the same size.')
    if theta < 0:
        raise ValueError('theta has to be non-negative.')

    if theta == 0:
        nll = -binom(n=d, p=mu).logpmf(a).sum()
    elif np.isinf(theta):
        nll = -binom(n=d > 0, p=mu).logpmf(a > 0).sum()
    else:
        alpha = reparameterize_polya_ms(np.hstack([mu, 1 - mu]), 1 / theta)
        nll = -betabinom(n=d,
                         a=alpha[:, 0, np.newaxis],
                         b=alpha[:, 1, np.newaxis]).logpmf(a).sum()
    return nll
Exemplo n.º 5
0
    def __call__(self, batch_size, n_possible_points):
        if self.range_indcs is not None:
            n_possible_points = self.range_indcs[1] - self.range_indcs[0]

        if np.random.uniform(size=1) < self.proba_uniform:
            # whether to sample from a uniform distribution instead of using a and b
            n_indcs = random.randint(0, n_possible_points)

        else:
            if self.is_beta_binomial:
                rv = betabinom(n_possible_points, self.a, self.b)
                n_indcs = rv.rvs()

            else:
                a = ratio_to_int(self.a, n_possible_points)
                b = ratio_to_int(self.b, n_possible_points)
                n_indcs = random.randint(a, b)

        if self.is_ensure_one and n_indcs < 1:
            n_indcs = 1

        if self.is_batch_share:
            indcs = torch.randperm(n_possible_points)[:n_indcs]
            indcs = indcs.unsqueeze(0).expand(batch_size, n_indcs)
        else:
            indcs = (np.arange(n_possible_points).reshape(
                1, n_possible_points).repeat(batch_size, axis=0))
            indep_shuffle_(indcs, -1)
            indcs = torch.from_numpy(indcs[:, :n_indcs])

        if self.range_indcs is not None:
            # adding is teh same as shifting
            indcs += self.range_indcs[0]

        return indcs
Exemplo n.º 6
0
    def test_betabinom(self):
        from scipy.stats import betabinom
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 1)

        n, a, b = 5, 2.3, 0.63
        mean, var, skew, kurt = betabinom.stats(n, a, b, moments='mvsk')

        x = np.arange(betabinom.ppf(0.01, n, a, b),
                      betabinom.ppf(0.99, n, a, b))
        ax.plot(x,
                betabinom.pmf(x, n, a, b),
                'bo',
                ms=8,
                label='betabinom pmf')
        ax.vlines(x, 0, betabinom.pmf(x, n, a, b), colors='b', lw=5, alpha=0.5)

        rv = betabinom(n, a, b)
        ax.vlines(x,
                  0,
                  rv.pmf(x),
                  colors='k',
                  linestyles='-',
                  lw=1,
                  label='frozen pmf')
        ax.legend(loc='best', frameon=False)
        #        plt.show()
        self.assertEqual("AxesSubplot(0.125,0.11;0.775x0.77)", str(ax))
Exemplo n.º 7
0
def test_betabinom_a_and_b_unity():
    # test limiting case that betabinom(n, 1, 1) is a discrete uniform
    # distribution from 0 to n
    n = 20
    k = np.arange(n + 1)
    p = betabinom(n, 1, 1).pmf(k)
    expected = np.repeat(1 / (n + 1), n + 1)
    assert_almost_equal(p, expected)
Exemplo n.º 8
0
def test_betabinom_bernoulli():
    # test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b))
    a = 2.3
    b = 0.63
    k = np.arange(2)
    p = betabinom(1, a, b).pmf(k)
    expected = bernoulli(a / (a + b)).pmf(k)
    assert_almost_equal(p, expected)
Exemplo n.º 9
0
def beta_binomial_prior_distribution(phoneme_count, mel_count, scaling_factor=1.0):
    x = np.arange(0, phoneme_count)
    mel_text_probs = []
    for i in range(1, mel_count + 1):
        a, b = scaling_factor * i, scaling_factor * (mel_count + 1 - i)
        mel_i_prob = betabinom(phoneme_count, a, b).pmf(x)
        mel_text_probs.append(mel_i_prob)
    return np.array(mel_text_probs)
Exemplo n.º 10
0
def beta_binomial_prior_distribution(phoneme_count, mel_count):
    P, M = phoneme_count, mel_count
    x = np.arange(0, P)
    mel_text_probs = []
    for i in range(1, M + 1):
        a, b = i, M + 1 - i
        rv = betabinom(P, a, b)
        mel_i_prob = rv.pmf(x)
        mel_text_probs.append(mel_i_prob)
    return torch.tensor(np.array(mel_text_probs))
Exemplo n.º 11
0
def design_prior_filter(a, b, n):
    from scipy.stats import betabinom
    beta = betabinom(a=a, b=b, n=n)
    taps = beta.pmf([i for i in range(n, -1, -1)])
    filter_length = 2 * (len(taps) - 1) + 1

    filter_coef = np.zeros((filter_length, 1, 1))
    filter_coef[:(n + 1), 0, 0] = taps

    print(f'priror filter : {filter_coef.flatten()}')

    return filter_coef
 def plot(self, **kwargs):
     x = np.arange(
         betabinom.ppf(0.01, **self.parameters),
         betabinom.ppf(0.99, **self.parameters),
     )
     plt.vlines(
         x,
         0,
         betabinom(**self.parameters).pmf(x),
         label='pmf',
         **kwargs,
     )
Exemplo n.º 13
0
 def _reset_distribution(self):
     self._distribution: rv_discrete = betabinom(
         n=self._n, a=self._alpha, b=self._beta
     )
Exemplo n.º 14
0
    def _get_betabinom_probs(self, k, n, x, a):
        b = self._get_b(x, a)
        if (n, x, a) not in self.betabinomprobs:
            self.betabinomprobs[(n, x, a)] = betabinom(n, a, b)

        return (self.betabinomprobs[(n, x, a)].pmf(k))
Exemplo n.º 15
0
    msa = trim_terminals(
        read_fasta(f'../../ortho_MSA/realign_hmmer1/out/{OGid}.mfa'))

    # Create emission sequence
    emits = []
    for j in range(len(msa[0][1])):
        col = [1 if msa[i][1][j] in ['-', '.'] else 0 for i in range(len(msa))]
        emits.append(sum(col))

    # Instantiate model
    e_dists_rv = {}
    start_e_dists_rv = {}
    for state in ['1A', '3']:
        a, b = params['e_dists'][state]
        e_dists_rv[state] = betabinom_frozen(len(msa) - 1, a, b)
        start_e_dists_rv[state] = stats.betabinom(len(msa) - 1, a, b)
    for state in ['1B', '2']:
        a0, b0, a1, b1 = params['e_dists'][state]
        e_dists_rv[state] = ar1_betabinom_frozen(len(msa) - 1, a0, b0, a1, b1)
        start_e_dists_rv[state] = get_stationary_dist(
            len(msa) - 1, a0, b0, a1, b1)
    model = hmm.ARHMM(params['t_dists'], e_dists_rv, params['start_t_dist'],
                      start_e_dists_rv)

    # Decode states and plot
    fbs = model.forward_backward(emits)
    draw.plot_msa_lines([seq.upper() for _, seq in msa],
                        [fbs['1A'], fbs['2'], fbs['3'], fbs['1B']],
                        figsize=(15, 6))
    plt.savefig(f'out/{OGid}_wide.png', bbox_inches='tight')
    plt.close()
Exemplo n.º 16
0
 def __init__(self, n, a, b):
     self.n = n
     self.a = a
     self.b = b
     self.dist = stats.betabinom(n, a, b)
Exemplo n.º 17
0
def fit_betamix(X_N, X, num_betas, debug=False):
    # Thetas
    N = X_N.max()
    thetas = np.random.randint(low=2, high=10, size=(num_betas, 2))
    # Mixing coefficients:
    alphas = np.random.rand(num_betas)
    alphas /= alphas.sum()

    ROUNDS = 200 if not debug else 1
    QUIET = True
    lls = []
    for r in range(ROUNDS):
        dists = [betabinom(X_N, *theta) for theta in thetas]
        # E Step
        memberships = np.stack([a * d.pmf(X) for a, d in zip(alphas, dists)])
        memberships = memberships / memberships.sum(axis=0)

        # M Step
        fit_args = {
            'samples_N': X_N,
            'samples': X,
            'weights': memberships,
            'guess': thetas,
        }

        for i in range(MAX_RETRIES):
            new_thetas = fit_betabinom(**fit_args)
            fit_args['guess'] = np.random.randint(low=2,
                                                  high=10,
                                                  size=(num_betas, 2))

            success = not (new_thetas is None)
            if success:
                break

            print('RETRYING...')

        if success:
            pass
        else:
            if len(lls) > 1:
                den = np.max([lls[-1], lls[-2]])
                rel_improv = np.abs(lls[-1] - lls[-2]) / den
            else:
                rel_improv = float('inf')

            if rel_improv < 0:  # nono REL_IMPROVE_THRESH:
                # if we have just hit convergence early
                new_thetas = thetas
            else:
                uid = uuid4()
                fn = f'err_{uid}_{num_betas}.pt'
                p = f'/data/theory/robustopt/engstrom/store/bayes/{fn}'

                fit_args.update({'round': r, 'lls': np.array(lls)})

                ch.save(fit_args, p)
                raise ValueError('Opt did not converge!')
                return None, None

        thetas = new_thetas
        likelihood = fit_betabinom(X_N,
                                   X,
                                   memberships,
                                   guess=thetas,
                                   optimize=False)
        lls.append(likelihood)
        alphas = memberships.sum(axis=1) / memberships.sum()

    with np.printoptions(precision=2, suppress=True):
        print('-----')
        print(f"Round {r} done")
        print(f"Theta: \n {np.array(thetas)}")
        print(f"Alpha: {alphas}")
        print(f"Likelihood: {likelihood}")

    ll = likelihood
    ret = BBinomialMixture(N, thetas, alphas), BMixture(thetas, alphas), ll
    return ret
Exemplo n.º 18
0
    def _reset_distribution(self):

        self._distribution = betabinom(self._n, self._alpha, self._beta)
Exemplo n.º 19
0
 def _f(params):
     a, b = params
     pmf_val = betabinom(samples_N, a, b).logpmf(samples)
     assert pmf_val.shape == w.shape
     return -(pmf_val * w).mean()
Exemplo n.º 20
0
 def pmf(self, samples):
     total_pmf = 0.0
     for (a, b), alpha in zip(self.thetas, self.alphas):
         dist = betabinom(self.N, a, b)
         total_pmf += alpha * dist.pmf(samples)
     return total_pmf