def calc_coverage_threshold(cov_dict):
    '''
    calculate minimum coverage threshold for each key in cov_dict.
    see end of 'alternative parameterization' section of Negative binomial page
    and scipy negative binomial documentation for details of calculation.
    '''
    threshold_dict = {}
    for g in cov_dict:
        mean = float(cov_dict[g]['mean'])
        var = float(cov_dict[g]['variance'])
        q = (var-mean)/var
        n = mean**2/(var-mean)
        p = 1 - q

        ## assert that I did the math correctly.
        assert(isclose(nbinom.mean(n,p), mean))
        assert(isclose(nbinom.var(n,p), var))

        ## find the integer threshold that includes ~95% of REL606 distribution,
        ## excluding 5% on the left hand side.
        my_threshold = nbinom.ppf(0.05,n,p)
        my_threshold_p = nbinom.cdf(my_threshold,n,p)
        threshold_dict[g] = {'threshold':str(my_threshold),
                             'threshold_p':str(my_threshold_p)}
    return threshold_dict
Пример #2
0
 def test_mran_var_p2(self):
     n, p = sm.distributions.zinegbin.convert_params(7, 1, 2)
     nbinom_mean, nbinom_var = nbinom.mean(n, p), nbinom.var(n, p)
     zinb_mean = sm.distributions.zinegbin.mean(7, 1, 2, 0)
     zinb_var = sm.distributions.zinegbin.var(7, 1, 2, 0)
     assert_allclose(nbinom_mean, zinb_mean, rtol=1e-10)
     assert_allclose(nbinom_var, zinb_var, rtol=1e-10)
Пример #3
0
 def test_mean_var(self):
     for m in [9, np.array([1, 5, 10])]:
         n, p = sm.distributions.zinegbin.convert_params(m, 1, 1)
         nbinom_mean, nbinom_var = nbinom.mean(n, p), nbinom.var(n, p)
         zinb_mean = sm.distributions.zinegbin._mean(m, 1, 1, 0)
         zinb_var = sm.distributions.zinegbin._var(m, 1, 1, 0)
         assert_allclose(nbinom_mean, zinb_mean, rtol=1e-10)
         assert_allclose(nbinom_var, zinb_var, rtol=1e-10)
def calc_2X_coverage_threshold(cov_dict):
    '''
    calculate coverage threshold for each key in cov_dict, based on a likelihood ratio 
    between empirical Nbinom(mu,disp) 1X coverage distribution, and a theoretical 
    Poisson(2*mu) 2X coverage distribution.
    see end of 'alternative parameterization' section of Negative binomial page
    and scipy negative binomial documentation for details of calculation.

    choose coverage threshold s.t. log likelihood ratio > 10.

    '''

    ## to convert my IDs to REL IDs.
    rel_name = {'RM3-130-1':'REL11734','RM3-130-2':'REL11735',
                'RM3-130-3':'REL11736','RM3-130-4':'REL11737',
                'RM3-130-5':'REL11738','RM3-130-6':'REL11739',
                'RM3-130-7':'REL11740','RM3-130-8':'REL11741',
                'RM3-130-9':'REL11742','RM3-130-10':'REL11743',
                'RM3-130-11':'REL11744','RM3-130-12':'REL11745',
                'RM3-130-13':'REL11746','RM3-130-14':'REL11747',
                'RM3-130-15':'REL11748','RM3-130-16':'REL11749',
                'RM3-130-17':'REL11750','RM3-130-18':'REL11751',
                'RM3-130-19':'REL11752','RM3-130-20':'REL11753',
                'RM3-130-21':'REL11754','RM3-130-22':'REL11755',
                'RM3-130-23':'REL11756','RM3-130-24':'REL11757',
                'REL4397':'REL4397', 'REL4398':'REL4398',
                'REL288':'REL288','REL291':'REL291','REL296':'REL296','REL298':'REL298'}

    
    threshold_dict = {}
    for g in cov_dict:
        mean = float(cov_dict[g]['mean'])
        var = float(cov_dict[g]['variance'])
        q = (var-mean)/var
        n = mean**2/(var-mean)
        p = 1 - q
        
        ## assert that I did the math correctly.
        assert(isclose(nbinom.mean(n,p), mean))
        assert(isclose(nbinom.var(n,p), var))

        ## find the integer threshold that includes ~95% of REL606 distribution,
        ## excluding 5% on the left hand side.
        for x in range(int(mean),int(2*mean)):
            p0 = nbinom.pmf(x,n,p)
            p1 = poisson.pmf(x,2*mean)
            lratio = p1/p0
            if lratio > 10:
                my_threshold = x
                my_threshold_p0 = p0
                my_threshold_p1 = p1
                my_lratio = lratio
                break    
        threshold_dict[rel_name[g]] = {'threshold':str(my_threshold),
                             'threshold_p0':str(my_threshold_p0),
                             'threshold_p1':str(my_threshold_p1),
                             'lratio':str(lratio)}
    return threshold_dict
Пример #5
0
def ComputeNBMeanVar(ExprPar):
    '''
    Compute the mean and the variance of a NB distribution with parameter n and p
    '''

    n = ExprPar[1]
    p = ExprPar[0]

    M = nbinom.mean(n, p)
    V = nbinom.var(n, p)
    return M, V
Пример #6
0
def plot_pascal_distr(p, n):
    xs = []
    ys = []
    cum = 0
    k = 0
    while cum < 0.99:
        prob = nbinom.pmf(k, n, p)
        cum += prob
        xs.append(k)
        ys.append(prob)
        k += 1
    plt.gca().axvline(x=nbinom.mean(n, p), color="red")
    plt.plot(
        xs,
        ys,
        label="p={}".format(p),
        marker="o",
        linestyle="None",
        color="purple")
Пример #7
0
def analytical_MPVS(
        infection_ts: pd.DataFrame, 
        smoothing: Callable,
        alpha: float = 3.0,                # shape 
        beta:  float = 2.0,                # rate
        CI:    float = 0.95,               # confidence interval 
        infectious_period: int = 5*days,   # inf period = 1/gamma,
        variance_shift: float = 0.99,      # how much to scale variance parameters by when anomaly detected 
        totals: bool = True                # are these case totals or daily new cases?
    ):
    """Estimates Rt ~ Gamma(alpha, 1/beta), and implements an analytical expression for a mean-preserving variance increase whenever case counts fall outside the CI defined by a negative binomial distribution"""
    # infection_ts = infection_ts.copy(deep = True)
    dates = infection_ts.index
    if totals:
        # daily_cases = np.diff(infection_ts.clip(lower = 0)).clip(min = 0) # infection_ts clipped because COVID19India API does weird stuff
        daily_cases = infection_ts.clip(lower = 0).diff().clip(lower = 0).iloc[1:]
    else: 
        daily_cases = infection_ts 
    total_cases = np.cumsum(smoothing(np.squeeze(daily_cases)))

    v_alpha, v_beta = [], []

    RR_pred, RR_CI_upper, RR_CI_lower = [], [], []

    T_pred, T_CI_upper, T_CI_lower = [], [], []

    new_cases_ts = []

    anomalies     = []
    anomaly_dates = []

    for i in range(2, len(total_cases)):
        new_cases     = max(0, total_cases[i]   - total_cases[i-1])
        old_new_cases = max(0, total_cases[i-1] - total_cases[i-2])

        alpha += new_cases
        beta  += old_new_cases
        v_alpha.append(alpha)
        v_beta.append(beta)

        RR_est   = max(0, 1 + infectious_period*np.log(Gamma.mean(     a = alpha, scale = 1/beta)))
        RR_upper = max(0, 1 + infectious_period*np.log(Gamma.ppf(CI,   a = alpha, scale = 1/beta)))
        RR_lower = max(0, 1 + infectious_period*np.log(Gamma.ppf(1-CI, a = alpha, scale = 1/beta)))
        RR_pred.append(RR_est)
        RR_CI_upper.append(RR_upper)
        RR_CI_lower.append(RR_lower)

        if (new_cases == 0 or old_new_cases == 0):
            if new_cases == 0:
                logger.debug("new_cases at time %s: 0", i)
            if old_new_cases == 0:
                logger.debug("old_new_cases at time %s: 0", i)
            T_pred.append(0)
            T_CI_upper.append(10) # <- where does this come from?
            T_CI_lower.append(0)
            new_cases_ts.append(0)

        if (new_cases > 0 and old_new_cases > 0):
            new_cases_ts.append(new_cases)

            r, p = alpha, beta/(old_new_cases + beta)
            T_pred.append(nbinom.mean(r, p))
            T_upper = nbinom.ppf(CI,   r, p)
            T_lower = nbinom.ppf(1-CI, r, p)
            T_CI_upper.append(T_upper)
            T_CI_lower.append(T_lower)

            _np = p
            _nr = r 
            anomaly_noted = False
            counter = 0
            while not (T_lower < new_cases < T_upper):
                if not anomaly_noted:
                    anomalies.append(new_cases)
                    anomaly_dates.append(dates[i])
                
                # logger.debug("anomaly identified at time %s: %s < %s < %s, r: %s, p: %s, annealing iteration: %s", i, T_lower, new_cases, T_upper, _nr, _np, counter+1)
                # nnp = 0.95 *_np # <- where does this come from 
                _nr = variance_shift * _nr * ((1-_np)/(1-variance_shift*_np) )
                _np = variance_shift * _np 
                T_upper = nbinom.ppf(CI,   _nr, _np)
                T_lower = nbinom.ppf(1-CI, _nr, _np)
                T_lower, T_upper = sorted((T_lower, T_upper))
                if T_lower == T_upper == 0:
                    T_upper = 1
                    logger.debug("CI collapse, setting T_upper -> 1")
                anomaly_noted = True

                counter += 1
                if counter >= 10000:
                    raise ValueError("Number of iterations exceeded")
            else:
                if anomaly_noted:
                    alpha = _nr # update distribution on R with new parameters that enclose the anomaly 
                    beta = _np/(1-_np) * old_new_cases

                    T_pred[-1] = nbinom.mean(_nr, _np)
                    T_CI_lower[-1] = nbinom.ppf(CI,   _nr, _np)
                    T_CI_upper[-1] = nbinom.ppf(1-CI, _nr, _np)

                    # annealing leaves the RR mean unchanged, but we need to adjust its widened CI
                    RR_upper = max(0, 1 + infectious_period * np.log(Gamma.ppf(CI    , a = alpha, scale = 1/beta)))
                    RR_lower = max(0, 1 + infectious_period * np.log(Gamma.ppf(1 - CI, a = alpha, scale = 1/beta)))

                    # replace latest CI time series entries with adjusted CI 
                    RR_CI_upper[-1] = RR_upper
                    RR_CI_lower[-1] = RR_lower
    return (
        dates[2:], 
        RR_pred, RR_CI_upper, RR_CI_lower, 
        T_pred, T_CI_upper, T_CI_lower, 
        total_cases, new_cases_ts, 
        anomalies, anomaly_dates
    )
Пример #8
0
 def mean(self, n, p):
     mu = nbinom.mean(self, n, p)
     return mu