Пример #1
0
def update_data_emission_matrix_using_negative_binomial(
        d, dshared, phis, mus, data, index, timepoint):
    """
    Update the data emission matrix based on the negative binomial
    distribution.
    This function allows using different phi and mu for every single state.
    phis: an array containing phi for every non-silent state 
    mus:  an array containing mu for every non-silent state 
    data: an array of integer data_emission_matrix
    """
    data_emission_matrix = d['data_emission_matrix']
    dictionary = {}
    for i in range(dshared['n_obs']):
        for j in range(dshared['silent_states_begin']):
            if (phis[j], mus[j]) not in dictionary:
                dictionary[(phis[j], mus[j])] = {}
                p = phis[j] / (mus[j] + phis[j])
                dictionary[(phis[j], mus[j])][data[i]] = nbinom.pmf(
                    data[i], phis[j], p)
            elif data[i] not in dictionary[(phis[j], mus[j])]:
                p = phis[j] / (mus[j] + phis[j])
                dictionary[(phis[j], mus[j])][data[i]] = nbinom.pmf(
                    data[i], phis[j], p)
            data_emission_matrix[index][i][j] *= dictionary[(phis[j],
                                                             mus[j])][data[i]]
    d['data_emission_matrix'] = data_emission_matrix
Пример #2
0
def p_n1_pl_n2(n, theta, m, t1, t2):
    summ = 0
    p1 = theta / (t1 + theta)
    p2 = theta / (t2 + theta)
    for j in range(n + 1):
        summ += nbinom.pmf(j,m,p1)*\
                nbinom.pmf(n-j,m,p2)
    return summ
Пример #3
0
def getllxtensor_singleroi(roi: str, data_path: str, fits_path: str,
                           models_path: str, model_name: str,
                           fit_format: int) -> np.array:
    """Recompute a single log-likelihood tensor (n_samples x n_datapoints).

    Args:
        roi (str): A single ROI, e.g. "US_MI" or "Greece".
        data_path (str): Full path to the data directory.
        fits_path (str): Full path to the fits directory.
        models_path (str): Full path to the models directory.
        model_name (str): The model name (without the '.stan' suffix).
        fit_format (int): The .csv (0) or .pkl (1) fit format.

    Returns:
        np.array: The log-likelihood tensor.
    """
    csv_path = Path(data_path) / ("covidtimeseries_%s_.csv" % roi)
    df = pd.read_csv(csv_path)
    t0 = np.where(df["new_cases"].values > 1)[0][0]
    y = df[['new_cases', 'new_recover', 'new_deaths']].to_numpy()\
        .astype(int)[t0:, :]
    # load samples
    samples = extract_samples(fits_path, models_path, model_name, roi,
                              fit_format)
    S = np.shape(samples['lambda[0,0]'])[0]
    # print(S)
    # get number of observations, check against data above
    for i in range(1000, 0, -1):  # Search for it from latest to earliest
        candidate = '%s[%d,0]' % ('lambda', i)
        if candidate in samples:
            N = i + 1  # N observations, add 1 since index starts at 0
            break  # And move on
    print(N)  # run using old data
    print(len(y))
    llx = np.zeros((S, N, 3))
    # # conversion from Stan neg_binom2(n_stan | mu,phi)
    # to scipy.stats.nbinom(k,n_scipy,p)
    # #     n_scipy = phi,    p = phi/mu, k = n_stan
    # t0 = time.time()
    for i in range(S):
        phi = samples['phi'][i]
        for j in range(N):
            mu = max(samples['lambda[' + str(j) + ',0]'][i], 1)
            llx[i, j, 0] = np.log(nbinom.pmf(max(y[j, 0], 0), phi, phi / mu))
            mu = max(samples['lambda[' + str(j) + ',1]'][i], 1)
            llx[i, j, 1] = np.log(nbinom.pmf(max(y[j, 1], 0), phi, phi / mu))
            mu = max(samples['lambda[' + str(j) + ',2]'][i], 1)
            llx[i, j, 2] = np.log(nbinom.pmf(max(y[j, 2], 0), phi, phi / mu))
        print(np.sum(llx[i, :, :]))
        print(samples['ll_'][i])
        print('--')
    return llx
Пример #4
0
def nll_glm(params, y, Xg, Dg=None, nzmean=None, diversity=None, dist="nb"):
    """ Negative log-likelihood of the ZINB-GLM model """
    eps = 1e-20  # A little epsilon to avoid errors
    pg = params[0]
    ag, yg = np.split(params[1:], 2)
    # Convert to compatible shapes, column vectors
    ag = ag.reshape(-1, 1)
    yg = yg.reshape(-1, 1)
    n_reads = y[y == 0].reshape(-1, 1)
    y_reads = y[y > 0].reshape(-1, 1)
    # Use the formulas to get distribution parameters
    n = np.exp(pg)  # Dispersion parameters
    # Mean is estimated using ag if there is no nzmean arg
    mu = np.exp(Xg.dot(ag))
    # Multiply mean offsets, ag estimates a ratio of the expected mean over non-zero mean
    if nzmean is not None: mu = np.multiply(mu, Dg.dot(nzmean.reshape(-1, 1)))
    # Extraneous is estimated by yg
    yg = Xg.dot(yg)
    # Add extraneous offset, yg estimates the change from the average diversity
    if diversity is not None:
        temp = Dg.dot(diversity.reshape(-1, 1))
        temp[temp <= 0] = eps
        temp[temp >= 1] = 1 - eps
        yg += np.log(
            temp /
            (1 - temp))  # logit function, hopefully this is never 0 or 1
    pi = 1 / (1 + np.exp(-yg))  # Sigmoid, inverse of logit function
    p = n / (mu + n)

    # ZINB (Equation 2)
    if dist == "nb":
        # Negative Binomial
        n_reads = pi[y == 0] + (1.0 - pi[y == 0]) * nbinom.pmf(
            n_reads, n, p[y == 0])
        y_reads = (1.0 - pi[y > 0]) * nbinom.pmf(y_reads, n, p[y > 0])
    elif dist == "norm":
        # Normal
        # "Normal Approximation to the Negative Binomial" by statisticsmatt
        # https://www.youtube.com/watch?v=JhmmbgLLVkQ
        var = n * (1 - p) / (p * p)
        sd = np.sqrt(var)
        n_reads = pi[y == 0] + (1.0 - pi[y == 0]) * norm.pdf(
            n_reads, mu[y == 0], sd[y == 0])
        y_reads = (1.0 - pi[y > 0]) * norm.pdf(y_reads, mu[y > 0], sd[y > 0])
    # Compute the negative log-likelihood
    """ https://stackoverflow.com/questions/5124376/convert-nan-value-to-zero/5124409 """
    # The stackoverflow answer was ok, but sometimes values are still 0
    # I added a little epsilon and everything seems to work better.
    n_reads = np.nan_to_num(np.log(n_reads + eps))
    y_reads = np.nan_to_num(np.log(y_reads + eps))
    nll = -(np.sum(n_reads) + np.sum(y_reads))
    return nll
Пример #5
0
def compute_likelihood(failuers_before_five_successes, theta1_range, theta2_range):
    """
        Computes the likelihood over the range (0, 1) for two theta parameters.
        The likelihood is modeled by a Negative Binomial pmf.
    """

    no_successes = 5

    likelihood_grid = np.zeros((len(theta1_range), len(theta2_range)))

    for x in range(len(theta1_range)):
        for y in range(len(theta2_range)):
            total_likelihood = 0
            theta1 = theta1_range[x]
            theta2 = theta2_range[y]

            for data_point_failures in failuers_before_five_successes:

                p = theta1 * theta2 + (1 - theta1) * (1 - theta2)
                likelihood = np.log(nbinom.pmf(
                    data_point_failures, no_successes, p))
                total_likelihood += likelihood

            likelihood_grid[x, y] = total_likelihood

    return np.exp(likelihood_grid)
Пример #6
0
 def _logpmf(self, x, mu, alpha, p, w):
     s, p = self.convert_params(mu, alpha, p)
     return _lazywhere(x != 0, (x, s, p, w),
                       (lambda x, s, p, w: np.log(1. - w) +
                       nbinom.logpmf(x, s, p)),
                       np.log(w + (1. - w) *
                       nbinom.pmf(x, s, p)))
Пример #7
0
    def coverage_probability(self,nr_obs, a, mean_lib, stddev_lib,z, coverage_mean, read_len, s_inner,s_outer, b=None, coverage_model = False):
        ''' Distribution P(o|c,z) for prior probability over coverage.
            This probability distribution is implemented as an poisson 
            distribution.

            Attributes:

            c       -- coverage
            mean    -- mean value of poisson distribution.

            Returns probability P(c)

        '''
        if not b: 
            # only one reference sequence.
            # We split the reference sequence into two equal 
            # length sequences to fit the model. 
            a = a/2
            b = a/2

        param = Param(mean_lib, stddev_lib, coverage_mean, read_len, s_inner,s_outer)
        lambda_ = mean_span_coverage(a, b, z, param)

        if coverage_model == 'Poisson':
            return poisson.pmf(nr_obs, lambda_, loc=0)

        elif coverage_model == 'NegBin':
            p = 0.01
            n = (p*lambda_)/(1-p)
            return nbinom.pmf(nr_obs, n, p, loc=0) 
        else:
            # This is equivalent to uniform coverage
            return 1 #uniform.pdf(nr_obs, loc=lambda_- 0.3*lambda_, scale=lambda_ + 0.3*lambda_ )
Пример #8
0
def getloglikelihood2(kmat, mu_estimate, alpha, sumup=False, log=True):
    '''
    Get the log likelihood estimation of NB, using the current estimation of beta
    '''
    if kmat.shape[0] != mu_estimate.shape[0]:
        raise ValueError(
            'Count table dimension is not the same as mu vector dimension.')
    alpha = np.matrix(alpha).reshape(mu_estimate.shape[0],
                                     mu_estimate.shape[1])
    kmat_r = np.round(kmat)
    mu_sq = np.multiply(mu_estimate, mu_estimate)
    var_vec = mu_estimate + np.multiply(alpha, mu_sq)
    nb_p = np.divide(mu_estimate, var_vec)
    nb_r = np.divide(mu_sq, var_vec - mu_estimate)
    if log:
        logp = nbinom.logpmf(kmat_r, nb_r, nb_p)
    else:
        logp = nbinom.pmf(kmat, nb_r, nb_p)

    if np.isnan(np.sum(logp)):
        #raise ValueError('nan values for log likelihood!')
        logp = np.where(np.isnan(logp), 0, logp)
    if sumup:
        return np.sum(logp)
    else:
        return logp
Пример #9
0
def NBPara2Arr(param, n):
    # print param
    Arr = np.zeros((n, 1))
    for i in range(int(n)):
        Arr[i] = nbinom.pmf(i, 1, 1 - param)  # change failure prob
    Arr *= 1 / np.sum(Arr)
    return Arr
Пример #10
0
    def test_single_squeezed_state_hafnian(self):
        """Test the sampling routines by comparing the photon number frequencies and the exact
        probability distribution of a single mode squeezed vacuum state
        """
        n_samples = 1000
        mean_n = 1.0
        r = np.arcsinh(np.sqrt(mean_n))
        sigma = np.array([[np.exp(2 * r), 0.0], [0.0, np.exp(-2 * r)]])

        n_cut = 10
        samples = hafnian_sample_state(sigma, samples=n_samples, cutoff=n_cut)
        bins = np.arange(0, max(samples) + 1, 1)
        (freq, _) = np.histogram(samples, bins=bins)
        rel_freq = freq / n_samples
        nm = max(samples) // 2

        x = nbinom.pmf(np.arange(0, nm, 1), 0.5,
                       np.tanh(np.arcsinh(np.sqrt(mean_n)))**2)
        x2 = np.zeros(2 * len(x))
        x2[::2] = x
        rel_freq = freq[0:-1] / n_samples
        x2 = x2[0:len(rel_freq)]

        assert np.allclose(rel_freq,
                           x2,
                           atol=rel_tol / np.sqrt(n_samples),
                           rtol=rel_tol / np.sqrt(n_samples))
Пример #11
0
def NBPara2Arr(param,n):
        #print param
        Arr=np.zeros((n,1))
        for i in range(int(n)):
                Arr[i]=nbinom.pmf( i,1, 1-param) #change failure prob
        Arr*=1/np.sum(Arr)
        return Arr
Пример #12
0
 def _nbinom_pmf(self, value, log=False):
     if log:
         return nbinom.logpmf(value, self.no_of_successes,
                              self.prob_of_success)
     else:
         return nbinom.pmf(value, self.no_of_successes,
                           self.prob_of_success)
Пример #13
0
 def plot(self, x, n, p):
     pmf = nbinom.pmf(x, n, p)
     plt.plot(x, pmf, 'o-')
     plt.title('Neg_Binomial: n=%i , p=%.2f' % (n, p), fontsize='value')
     plt.xlabel('Number of successes')
     plt.ylable('Probability of Successes', fontsize='value')
     plt.show()
Пример #14
0
 def _logpmf(self, x, mu, alpha, p, w):
     s, p = self.convert_params(mu, alpha, p)
     return _lazywhere(x != 0, (x, s, p, w),
                       (lambda x, s, p, w: np.log(1. - w) + 
                       nbinom.logpmf(x, s, p)),
                       np.log(w + (1. - w) *
                       nbinom.pmf(x, s, p)))
Пример #15
0
    def __init__(self, endog, exog=None, missing='none', **kwds):
        super(CUSTOM_ZNB, self).__init__(endog, exog, missing=missing, **kwds)

        if exog is None: self.exog = np.ones((self.nobs, 1))
        self.nparams = self.exog.shape[1]

        obs_zp = len([e
                      for e in self.endog if e == 0]) / float(len(self.endog))
        pred_zp = nbinom.pmf(0, 2, 2.0 / (2.0 + self.endog.mean()))
        pred_zp = poisson.pmf(0, self.endog.mean())
        additional_zp = obs_zp - pred_zp

        self.exog_names.append('alpha')

        if additional_zp > 0.25:
            init_z_val = np.log((1.0 / additional_zp) - 1)
            self.start_params = np.hstack(
                (np.zeros(self.nparams), 0.5, init_z_val))
            self.exog_names.append('zi')
            self.nloglikeobs = self.nloglikeobs_wzp
        else:
            self.start_params = np.zeros(self.nparams)
            self.start_params = np.hstack((np.zeros(self.nparams), 0.5))
            self.nloglikeobs = self.nloglikeobs_woz

        self.start_params[0] = np.log(self.endog.mean())
        self.cloneattr = ['start_params']
Пример #16
0
def dcPF_loglik(yt,link_RQ,l,p):
    N,trash = link_RQ.shape
    nb_pmf = nbinom.pmf(np.arange(N),-l/np.log(1-p),1-p)
    logP = np.log(nb_pmf.dot(link_RQ))
    logP0 = np.log(1-np.exp(-l))
    loglik = logP-logP0
    res = loglik[yt]
    return res.sum()
Пример #17
0
    def negativebinomial_pmf(self, x, mu, kappa):

        n = kappa
        p = float(kappa) / (kappa + mu)

        negbinomial_pdf = nbinom.pmf(x, n, p)

        return negbinomial_pdf
Пример #18
0
 def get_rad_negbin(self, S, n, p):
     """Obtain the predicted RAD from a negative binomial distribution"""
     abundance = list(np.empty([S]))
     rank = range(1, int(S) + 1)
     cdf_obs = [(rank[i]-0.5) / S for i in range(0, int(S))]
     j = 0
     cdf_cum = 0
     i = 1
     while j < S:
         cdf_cum += nbinom.pmf(i, n, p) / (1 - nbinom.pmf(0, n, p))
         while cdf_cum >= cdf_obs[j]:
             abundance[j] = i
             j += 1
             if j == S:
                 abundance.reverse()
                 return abundance
         i += 1
Пример #19
0
 def get_rad_negbin(self, S, n, p):
     """Obtain the predicted RAD from a negative binomial distribution"""
     abundance = list(np.empty([S]))
     rank = range(1, int(S) + 1)
     cdf_obs = [(rank[i] - 0.5) / S for i in range(0, int(S))]
     j = 0
     cdf_cum = 0
     i = 1
     while j < S:
         cdf_cum += nbinom.pmf(i, n, p) / (1 - nbinom.pmf(0, n, p))
         while cdf_cum >= cdf_obs[j]:
             abundance[j] = i
             j += 1
             if j == S:
                 abundance.reverse()
                 return abundance
         i += 1
Пример #20
0
    def test_pmf(self):
        n, p = truncatednegbin.convert_params(2, 0.5, 2)
        nb_logpmf = nbinom.pmf(6, n, p) / nbinom.sf(5, n, p)
        tnb_pmf = truncatednegbin.pmf(6, 2, 0.5, 2, 5)
        assert_allclose(nb_logpmf, tnb_pmf, rtol=1e-7)

        tnb_pmf = truncatednegbin.pmf(5, 2, 0.5, 2, 5)
        assert_equal(tnb_pmf, 0)
Пример #21
0
 def pmf(self,data,pi=None,lambda_0=None,r=None,p=None,loc=None):
     pi = pi if pi is not None else self.pi
     lambda_0 = lambda_0 if lambda_0 is not None else self.lambda_0
     r = r if r is not None else self.r
     p = p if p is not None else self.p
     loc = loc if loc is not None else 0
     
     return pi*poisson.pmf(data,mu=lambda_0,loc=loc)+(1-pi)*nbinom.pmf(data,n=r,p=1-p,loc=loc)
Пример #22
0
 def loglik(n_arr, t_arr, m, theta):
     if len(n_arr) != len(t_arr):
         raise ValueError("Length of arrays should be the same.")
     ll = 0
     for i in range(len(n_arr)):
         p = t_arr[i] / (t_arr[i] + theta)
         ll += np.log(nbinom.pmf(n_arr[i], m, p))
     return ll
def calc_2X_coverage_threshold(cov_dict):
    '''
    calculate coverage threshold for each key in cov_dict, based on a likelihood ratio 
    between empirical Nbinom(mu,disp) 1X coverage distribution, and a theoretical 
    Poisson(2*mu) 2X coverage distribution.
    see end of 'alternative parameterization' section of Negative binomial page
    and scipy negative binomial documentation for details of calculation.

    choose coverage threshold s.t. log likelihood ratio > 10.

    '''

    ## to convert my IDs to REL IDs.
    rel_name = {'RM3-130-1':'REL11734','RM3-130-2':'REL11735',
                'RM3-130-3':'REL11736','RM3-130-4':'REL11737',
                'RM3-130-5':'REL11738','RM3-130-6':'REL11739',
                'RM3-130-7':'REL11740','RM3-130-8':'REL11741',
                'RM3-130-9':'REL11742','RM3-130-10':'REL11743',
                'RM3-130-11':'REL11744','RM3-130-12':'REL11745',
                'RM3-130-13':'REL11746','RM3-130-14':'REL11747',
                'RM3-130-15':'REL11748','RM3-130-16':'REL11749',
                'RM3-130-17':'REL11750','RM3-130-18':'REL11751',
                'RM3-130-19':'REL11752','RM3-130-20':'REL11753',
                'RM3-130-21':'REL11754','RM3-130-22':'REL11755',
                'RM3-130-23':'REL11756','RM3-130-24':'REL11757',
                'REL4397':'REL4397', 'REL4398':'REL4398',
                'REL288':'REL288','REL291':'REL291','REL296':'REL296','REL298':'REL298'}

    
    threshold_dict = {}
    for g in cov_dict:
        mean = float(cov_dict[g]['mean'])
        var = float(cov_dict[g]['variance'])
        q = (var-mean)/var
        n = mean**2/(var-mean)
        p = 1 - q
        
        ## assert that I did the math correctly.
        assert(isclose(nbinom.mean(n,p), mean))
        assert(isclose(nbinom.var(n,p), var))

        ## find the integer threshold that includes ~95% of REL606 distribution,
        ## excluding 5% on the left hand side.
        for x in range(int(mean),int(2*mean)):
            p0 = nbinom.pmf(x,n,p)
            p1 = poisson.pmf(x,2*mean)
            lratio = p1/p0
            if lratio > 10:
                my_threshold = x
                my_threshold_p0 = p0
                my_threshold_p1 = p1
                my_lratio = lratio
                break    
        threshold_dict[rel_name[g]] = {'threshold':str(my_threshold),
                             'threshold_p0':str(my_threshold_p0),
                             'threshold_p1':str(my_threshold_p1),
                             'lratio':str(lratio)}
    return threshold_dict
Пример #24
0
    def _logpmf(self, x, mu, alpha, p, truncation):
        size, prob = self.convert_params(mu, alpha, p)
        pmf = 0
        for i in range(int(np.max(truncation)) + 1):
            pmf += nbinom.pmf(i, size, prob)

        logpmf_ = nbinom.logpmf(x, size, prob) - np.log(1 - pmf)
        # logpmf_[x < truncation + 1] = - np.inf
        return logpmf_
Пример #25
0
def plot_logo(adam_params, file_name, core_length):

    #assignes each kmer to an index and visa versa
    kmer_inx = generate_kmer_inx(core_length)
    inx_kmer = {y:x for x,y in kmer_inx.items()}
    
    colnames = [inx_kmer[i] for i in range(len(inx_kmer))] + [inx_kmer[i] for i in range(len(inx_kmer))] + ['sf', 'r', 'p'] + ['LL']
    data = pd.DataFrame(adam_params, columns=colnames)
    core1 = data.sort_values(by='LL').iloc[0,:len(kmer_inx)]
    core1_probs = energy2prob(core1, top_n=5)

    core2 = data.sort_values(by='LL').iloc[0,len(kmer_inx):2*len(kmer_inx)]
    core2_probs = energy2prob(core2, top_n=5)

    r = data.sort_values(by='LL')['r'].values[0]
    p = data.sort_values(by='LL')['p'].values[0]

    sns.set_style("ticks")
    sns.despine(trim=True)

    COLOR_SCHEME = {'G': 'orange', 
                    'A': 'red', 
                    'C': 'blue', 
                    'T': 'darkgreen',
                    'U': 'darkgreen'
                   }

    _ , (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(4.5, 1.5))
    plot_core_logo(core1_probs, ax1, color_scheme=COLOR_SCHEME)
    plot_core_logo(core2_probs, ax3, color_scheme=COLOR_SCHEME)

    #plot distance
    mean = ((1-p)*r)/(p)
    xx = np.arange(0,int(mean)+8,1)
    
    _ = ax2.plot(xx, nbinom.pmf(xx, r, p), 'o--',alpha=0.7, color='black')
    _ = ax2.set_xlabel('distance')
    
    #not show y axis in the plot
    ax2.set_frame_on(False)
    _ = ax2.get_yaxis().set_visible(False)
    xmin, xmax = ax2.get_xaxis().get_view_interval()
    ymin, ymax = ax2.get_yaxis().get_view_interval()
    ax2.add_artist(matplotlib.lines.Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2))


    _ = ax3.set_yticks(range(0,2))
    _ = ax3.set_yticklabels(np.arange(0,2,1))

    _ = ax3.get_xaxis().set_visible(False)
    _ = ax1.set_ylabel('probability')
    sns.despine(ax=ax2, trim=True)
    sns.despine(ax=ax3, trim=True)

    plt.savefig(file_name + '.pdf', bbox_inches='tight')
    plt.savefig(file_name + '.png', bbox_inches='tight', dpi=150)
Пример #26
0
 def nbinom_pmf_range(lambda_: int, rho: int, bin_id: int):
     stacked = np.zeros(len(kf_range), dtype=np.float64)
     lambda_ /= 100  # 2-digit precision
     rho /= 100  # 2-digit precision
     n = lambda_ / (rho - 1)
     p = 1 / rho
     start, end = bins[bin_id]
     for i in range(start, end + 1):
         stacked += nbinom.pmf(kf_range, n * i, p)
     return stacked
Пример #27
0
	def make_nb_plot(self):
		self._get_nb_estimate()
		p = self.nb_prob
		n = self.nb_size
		x = np.arange(0, 100)
		pmf = nbinom.pmf(x, n, p)
		self.line_neg_binomial, = plt.plot(x, pmf, ls=":", linewidth=2)

		self.nb_real_kl = entropy(self.probs, pmf)
		self.nb_grammar_kl = entropy(self.b_prob[:100], pmf[:100])
Пример #28
0
 def log_post_x_star_y_star(self, x_star, y_star):
     log_sum = 0.0
     N  = self.N_y_1 if y_star==1 else self.N_y_0
     Nx = self.X_sum_y_1 if y_star==1 else self.X_sum_y_0
     for col in self.x_cols:
         log_sum += np.log(nbinom.pmf(x_star[col], 
                               Nx[col] + self.a, 
                               (N+self.b)/float(N+self.b+1)))
     # print(self.count)
     self.count += 1
     return log_sum
Пример #29
0
def plot_nbinom(r, p):
    left  = nbinom.ppf(0.01, r, p)
    right = nbinom.ppf(0.99, r, p)
    x = np.arange(
        left,
        right,
        int((right - left) / 10)
    )
    plt.plot(
        x,
        nbinom.pmf(x, r, p),
        alpha=0.6,
        color='gray'
    )
    plt.plot(
        x,
        nbinom.pmf(x, r, p),
        'o',
        label='$r=%s, p = %s$' % (r, p)
    )
Пример #30
0
    def test_inversion_diffs(self):
        cfg = AppSettings()

        reps = 1000
        deltas = []  # observed number of differences

        for _ in range(0, reps):
            dna = Chromosome()
            old_seq = dna.sequence
            dna.inversion()
            deltas.append(
                sum(1 for a, b in zip(old_seq, dna.sequence) if a != b))

        pmfs = []
        expected_deltas = []  # expected differences

        # Assumes the length of an inversion is drawn from a negative binomial
        # distribution. Calculates the probability of each length until
        # 99.99% of the distribution is accounted for. The expected number of
        # differences for each length is multiplied by the probability of that length
        # and the sum of that gives the expected differences overall.
        k = 0
        while sum(pmfs) <= 0.9999:
            pmf = nbinom.pmf(k, 1, (1 - cfg.genetics.mutation_length /
                                    (1 + cfg.genetics.mutation_length)))
            pmfs.append(pmf)

            diffs = math.floor(
                k / 2) * (1 - 1 / len(Chromosome.nucleotides())) * 2
            expected_deltas.append(pmf * diffs)
            k += 1

        expected_delta = sum(expected_deltas)

        # Since we are multiplying the binomial distribution (probably of differences at
        # a given lenght) by a negative binomial distribution (probability of a length)
        # we must compute the variance of two independent random variables
        # is Var(X * Y) = var(x) * var(y) + var(x) * mean(y) + mean(x) * var(y)
        # http://www.odelama.com/data-analysis/Commonly-Used-Math-Formulas/

        mean_binom = cfg.genetics.mutation_length
        var_binom = binom.var(mean_binom, 1 / (len(Chromosome.nucleotides())))

        mean_nbinom = cfg.genetics.mutation_length
        var_nbinom = nbinom.var(cfg.genetics.mutation_length,
                                mean_nbinom / (1 + mean_nbinom))

        var = var_binom * var_nbinom + \
              var_binom * mean_nbinom + \
              mean_binom * var_nbinom

        observed_delta = sum(deltas) / reps
        conf_99 = ((var / reps)**(1 / 2)) * 5
        assert expected_delta - conf_99 < observed_delta < expected_delta + conf_99
 def plot_distribution(self):
     """
     Plot the distrubution of estimated Coronavirus cases in Dhaka 
     """
     p = self.calculate_pro_detected_overseas()
     n = self.international.cases
     
     fig, ax = plt.subplots(1, 1)
     x = np.arange(nbinom.ppf(0.025, n, p),
                nbinom.ppf(0.975, n, p))
     ax.vlines(x, 0, nbinom.pmf(x, n, p), color='lightblue', lw=5, alpha=0.5)
     ax.set_title(" pmf of coronavirus cases in Dhaka " + self.date)
Пример #32
0
def is_bipartite(adam_params, core_length):
    #assignes each kmer to an index and visa versa
    kmer_inx = generate_kmer_inx(core_length)
    inx_kmer = {y:x for x,y in kmer_inx.items()}
    
    colnames = [inx_kmer[i] for i in range(len(inx_kmer))] + [inx_kmer[i] for i in range(len(inx_kmer))] + ['sf', 'r', 'p'] + ['LL']
    data = pd.DataFrame(adam_params, columns=colnames)
    
    r = data.sort_values(by='LL')['r'].values[0]
    p = data.sort_values(by='LL')['p'].values[0]

    prob_zero = nbinom.pmf(0, r, p)

    return prob_zero<0.5
Пример #33
0
def fit_CRF(cons, resps, nr_c50, nr_expn, nr_gain, nr_base, v_varGain, fit_type):
    # fit_type (i.e. which loss function):
        # 1 - least squares
        # 2 - square root
        # 3 - poisson
        # 4 - modulated poisson
    np = numpy;

    n_sfs = len(resps);

    # Evaluate the model
    loss_by_sf = np.zeros((n_sfs, 1));
    for sf in range(n_sfs):
        all_params = (nr_c50, nr_expn, nr_gain, nr_base);
        param_ind = [0 if len(i) == 1 else sf for i in all_params];

        nr_args = [nr_base[param_ind[3]], nr_gain[param_ind[2]], nr_expn[param_ind[1]], nr_c50[param_ind[0]]]; 
	# evaluate the model
        pred = naka_rushton(cons[sf], nr_args); # ensure we don't have pred (lambda) = 0 --> log will "blow up"
        
        if fit_type == 4:
	    # Get predicted spike count distributions
          mu  = pred; # The predicted mean spike count; respModel[iR]
          var = mu + (v_varGain * np.power(mu, 2));                        # The corresponding variance of the spike count
          r   = np.power(mu, 2) / (var - mu);                           # The parameters r and p of the negative binomial distribution
          p   = r/(r + mu);
	# no elif/else

        if fit_type == 1 or fit_type == 2:
		# error calculation
          if fit_type == 1:
            loss = lambda resp, pred: np.sum(np.power(resp-pred, 2)); # least-squares, for now...
          if fit_type == 2:
            loss = lambda resp, pred: np.sum(np.square(np.sqrt(resp) - np.sqrt(pred)));

          curr_loss = loss(resps[sf], pred);
          loss_by_sf[sf] = np.sum(curr_loss);

        else:
		# if likelihood calculation
          if fit_type == 3:
            loss = lambda resp, pred: poisson.logpmf(resp, pred);
            curr_loss = loss(resps[sf], pred); # already log
          if fit_type == 4:
            loss = lambda resp, r, p: np.log(nbinom.pmf(resp, r, p)); # Likelihood for each pass under doubly stochastic model
            curr_loss = loss(resps[sf], r, p); # already log
          loss_by_sf[sf] = -np.sum(curr_loss); # negate if LLH

    return np.sum(loss_by_sf);
Пример #34
0
def gen_single_mode_dist(s, cutoff=50, N=1):
    """Generate the photon number distribution of :math:`N` identical single mode squeezed states.

    Args:
        s (float): squeezing parameter
        cutoff (int): Fock cutoff
        N (float): number of squeezed states

    Returns:
        (array): Photon number distribution
    """
    r = 0.5 * N
    q = 1.0 - np.tanh(s)**2
    N = cutoff // 2
    ps_tot = np.zeros(cutoff)
    if cutoff % 2 == 0:
        ps = nbinom.pmf(np.arange(N), p=q, n=r)
        ps_tot[0::2] = ps
    else:
        ps = nbinom.pmf(np.arange(N + 1), p=q, n=r)
        ps_tot[0:-1][0::2] = ps[0:-1]
        ps_tot[-1] = ps[-1]

    return ps_tot
Пример #35
0
    def coverage_probability(self,
                             nr_obs,
                             a,
                             mean_lib,
                             stddev_lib,
                             z,
                             coverage_mean,
                             read_len,
                             s_inner,
                             s_outer,
                             b=None,
                             coverage_model=False):
        ''' Distribution P(o|c,z) for prior probability over coverage.
            This probability distribution is implemented as an poisson 
            distribution.

            Attributes:

            c       -- coverage
            mean    -- mean value of poisson distribution.

            Returns probability P(c)

        '''
        if not b:
            # only one reference sequence.
            # We split the reference sequence into two equal
            # length sequences to fit the model.
            a = a / 2
            b = a / 2

        param = Param(mean_lib, stddev_lib, coverage_mean, read_len, s_inner,
                      s_outer)
        lambda_ = mean_span_coverage(a, b, z, param)

        if coverage_model == 'Poisson':
            return poisson.pmf(nr_obs, lambda_, loc=0)

        elif coverage_model == 'NegBin':
            p = 0.01
            n = (p * lambda_) / (1 - p)
            return nbinom.pmf(nr_obs, n, p, loc=0)
        else:
            # This is equivalent to uniform coverage
            return 1  #uniform.pdf(nr_obs, loc=lambda_- 0.3*lambda_, scale=lambda_ + 0.3*lambda_ )
Пример #36
0
def computeHscore(states, y, parameters_last, xweights_last, thetaweights_last,
                  t):
    #xweights is of size Nx x Ntheta (unnormalized); thetaweights is also unnormalized
    #the ouput compositeHScore is for model comparision, while simpleHScore is for H-based Bayes
    #the composite score is to evaluate the t-time obs. at the predictive distribution made at time t-1
    y0 = int(y[0])
    y1 = int(y[1])
    Nx = states.shape[0]
    Ntheta = states.shape[2]
    #compute the weighting matrix
    W = zeros((Nx, Ntheta))
    thetaweights_last = thetaweights_last / sum(
        thetaweights_last)  #first transform to normalized ones
    xNormConst = sum(xweights_last, axis=0)
    for k in range(Ntheta):
        W[:, k] = thetaweights_last[k] * xweights_last[:, k] / xNormConst[k]

    #compute the conditional density given a last theta-particle and a last x-particle
    p = zeros((Nx, Ntheta))
    n = zeros((Nx, Ntheta))
    for k in range(Nx):
        p[k, :] = 1 / (1 + parameters_last[0, :] * states[k, 0, :])
        p[k, :] = minimum(p[k, :], 1 - 1e-7)
        n[k, :] = maximum(1, floor(states[k, 0, :] * p[k, :] /
                                   (1 - p[k, :]))).astype(int32)
    # conDensi = zeros((Nx, Ntheta))

    score_y0 = average(a=nbinom.pmf(y0, n, p), weights=W)
    score_y0_p = average(a=nbinom.pmf(y0 + 1, n, p), weights=W)  #plus 1
    score_y1 = average(a=nbinom.pmf(y1, n, p), weights=W)
    score_y1_p = average(a=nbinom.pmf(y1 + 1, n, p), weights=W)
    # print '\n', score_y0_p, score_y0
    if y0 == 0:
        score0 = score_y0_p / score_y0 - 1 + 0.5 * pow(
            score_y0_p / score_y0 - 1, 2)
    else:
        score_y0_m = average(a=nbinom.pmf(y0 - 1, n, p), weights=W)  #minus 1
        score0 = score_y0_p / score_y0 - score_y0 / score_y0_m + 0.5 * pow(
            score_y0_p / score_y0 - 1, 2)
    if y1 == 0:
        score1 = score_y1_p / score_y1 - 1 + 0.5 * pow(
            score_y1_p / score_y1 - 1, 2)
    else:
        score_y1_m = average(a=nbinom.pmf(y1 - 1, n, p), weights=W)  #minus 1
        score1 = score_y1_p / score_y1 - score_y1 / score_y1_m + 0.5 * pow(
            score_y1_p / score_y1 - 1, 2)

    compositeHScore = score0 + score1
    simpleHScore = zeros(1)

    return {"simpleHScore": simpleHScore, "compositeHScore": compositeHScore}
Пример #37
0
def gen_ztnegbinom(n, mu, size):
    """Zero truncated negative binomial distribution.

        input:  n, int
                number of successes

                mu, float or int
                number of trials

                size, float
                probability of success

        output: ztnb, list of int
                draws from a zero truncated negative binomial distribution
    """

    temp = nbinom.pmf(0, mu, size)
    p = [uniform.rvs(loc=temp[i], scale=1-temp[i]) for i in range(n)]
    ztnb = [int(nbinom.ppf(p[i], mu[i], size)) for i in range(n)]

    return np.array(ztnb)
Пример #38
0
def log_neg_binom_likelihood(k, r, mu, sd=0):
	if sd==0:
		offset = 0
		diff = 1
		minR = r
		maxR = r
	else: 
		offset = NB_FRAC*sd
		minR = int(r - offset)
		maxR = int(r + offset)
		diff = maxR-minR+1 #num iterations
	
	mle=0
	#Inclusive
	for r_val in xrange(minR, maxR+1, NB_INCR):
		#likelihood that r_val, mu are true parameters given that you have seen k
		newVal = llh_neg_binom(k, r_val, mu)
		#probability of seeing k given r,p - assuming the prior is a negative binomial with 
		#r and mu as true values in this case
		weight = nbinom.pmf(k, r, mu)
		mle += newVal+weight
	return mle
Пример #39
0
def getloglikelihood2(kmat,mu_estimate,alpha,sumup=False,log=True):
  '''
  Get the log likelihood estimation of NB, using the current estimation of beta
  '''
  #logmu_est=sk.extended_design_mat * np.matrix(beta_est).getT()
  # Tracer()()
  #mu_estimate= np.exp(logmu_est)
  # these are all N*1 matrix
  #mu_vec=np.array([t[0] for t in mu_estimate.tolist()])
  #k_vec=np.array([round(t[0]) for t in kmat.tolist()])
  #if len(mu_vec) != len(k_vec):
  #  raise ValueError('Count table dimension is not the same as mu vector dimension.')
  # var_vec=mu_vec+alpha*mu_vec*mu_vec
  # nb_p=[mu_vec[i]/var_vec[i] for i in range(len(mu_vec))] 
  # nb_r=[mu_vec[i]*mu_vec[i]/(var_vec[i]-mu_vec[i]) for i in range(len(mu_vec))]
  # if log:
  #  logp=np.array([nbinom.logpmf(k_vec[i],nb_r[i],nb_p[i]) for i in range(len(mu_vec))])
  #else:
  #  logp=np.array([nbinom.pmf(k_vec[i],nb_r[i],nb_p[i]) for i in range(len(mu_vec))])
  if kmat.shape[0] != mu_estimate.shape[0]:
    raise ValueError('Count table dimension is not the same as mu vector dimension.')
  kmat_r=np.round(kmat)
  mu_sq=np.multiply(mu_estimate,mu_estimate)
  var_vec=mu_estimate+np.multiply(alpha, mu_sq)
  nb_p=np.divide(mu_estimate,var_vec)
  nb_r=np.divide(mu_sq,var_vec-mu_estimate)
  if log:
    logp=nbinom.logpmf(kmat_r,nb_r,nb_p)
  else:
    logp=nbinom.pmf(kmat,nb_r,nb_p)
  
  if np.isnan(np.sum(logp)):
    #raise ValueError('nan values for log likelihood!')
    logp=np.where(np.isnan(logp),0,logp)
  if sumup:
    return np.sum(logp)
  else:
    return logp
Пример #40
0
def getloglikelihood2(kmat,mu_estimate,alpha,sumup=False,log=True):
  '''
  Get the log likelihood estimation of NB, using the current estimation of beta
  '''
  if kmat.shape[0] != mu_estimate.shape[0]:
    raise ValueError('Count table dimension is not the same as mu vector dimension.')
  kmat_r=np.round(kmat)
  mu_sq=np.multiply(mu_estimate,mu_estimate)
  var_vec=mu_estimate+np.multiply(alpha, mu_sq)
  nb_p=np.divide(mu_estimate,var_vec)
  nb_r=np.divide(mu_sq,var_vec-mu_estimate)
  if log:
    logp=nbinom.logpmf(kmat_r,nb_r,nb_p)
  else:
    logp=nbinom.pmf(kmat,nb_r,nb_p)

  if np.isnan(np.sum(logp)):
    #raise ValueError('nan values for log likelihood!')
    logp=np.where(np.isnan(logp),0,logp)
  if sumup:
    return np.sum(logp)
  else:
    return logp
def getloglikelihood2(k_list,mu_list,alpha,sumup=False,log=True):
    '''
    Get the log likelihood estimation of NB, using the current estimation of beta and alpha
    '''
    # solution 1
    mu_sq=np.multiply(mu_list,mu_list)
    var_vec=mu_list+np.multiply(alpha, mu_sq)
    nb_p=np.divide(mu_list,var_vec)
    nb_r=np.divide(mu_sq,var_vec-mu_list)

    if log:
        logp=nbinom.logpmf(k_list,nb_r,nb_p)
    else:
        logp=nbinom.pmf(k_list,nb_r,nb_p)
    if np.isnan(np.sum(logp)):
        logp=np.where(np.isnan(logp),0,logp)
    #print("hi",np.sum(logp))
    if sumup:
        #print(np.sum(logp))
        return np.sum(logp)
    else:
        #pass
        return logp
Пример #42
0
	def get_nb(self):
		p = 0.200086480861
		n = 4.88137405883
		x = np.arange(0, self.distrib_len)
		self.nb_pmf = nbinom.pmf(x, n, p)
Пример #43
0
 def _ppf(self, q, n, p):
     return nbinom.ppf(nbinom.sf(0, n, p) * q + nbinom.pmf(0, n, p), n, p)
Пример #44
0
 def test_pmf_p2(self):
     n, p = sm.distributions.zinegbin.convert_params(30, 0.1, 2)
     nb_pmf = nbinom.pmf(100, n, p)
     tnb_pmf = sm.distributions.zinegbin.pmf(100, 30, 0.1, 2, 0.01)
     assert_allclose(nb_pmf, tnb_pmf, rtol=1e-5, atol=1e-5)
Пример #45
0
 def _rvs(self, n, p):
     return nbinom.ppf(uniform(low=nbinom.pmf(0, n, p)), n, p)
Пример #46
0
 def _pmf(self, x, n, p):
     if x == 0:
         return 0.0
     else:
         return nbinom.pmf(x, n, p) / nbinom.sf(0, n, p)
Пример #47
0
 def _cdf(self, x, n, p):
     k = floor(x)
     if k == 0:
         return 0.0
     else:
         return (nbinom.cdf(x, n, p) - nbinom.pmf(0, n, p)) / nbinom.sf(0, n, p) 
Пример #48
0
 def test_pmf(self):
     n, p = sm.distributions.zinegbin.convert_params(1, 0.9, 1)
     nb_logpmf = nbinom.pmf(2, n, p)
     tnb_pmf = sm.distributions.zinegbin.pmf(2, 1, 0.9, 2, 0.5)
     assert_allclose(nb_logpmf, tnb_pmf * 2, rtol=1e-7)