def binomCheck(
        ntpos
):  #checking the nt position for both forward and reverse mate pairs
    fordict = FORWARD_DICT[ntpos]
    revdict = REVERSE_DICT[ntpos]

    if not fordict or not revdict:  #if either forward dict or reverse dict is empty..
        accept = False

    else:
        topF = sorted(
            fordict, key=fordict.get, reverse=True
        )[:
          2]  #dictionaries can be in any particular order, where lists indexes start at 0
        topR = sorted(revdict, key=revdict.get, reverse=True)[:2]

        if len(topF) == 1 or len(topR) == 1:
            accept = False
            print 'unequal minor variant count in forward/reverse %d' % ntpos  #%s= string %d = number variable ntpos has already been defined
            print 'forward', fordict
            print 'reverse', revdict
            NOTESLIST.append('take a closer look at, only one minorvar %d' %
                             ntpos)
            NOTESLIST.append(fordict)
            NOTESLIST.append(revdict)
        else:
            f_majornt = topF[
                0]  #this will be the major because it will be the first highest
            f_minornt = topF[
                1]  #this will be the minor variant because it will be the second highest, remember that python starts numbering at 0

            r_majornt = topR[0]
            r_minornt = topR[1]

            if f_majornt != r_majornt or f_minornt != r_minornt:
                print 'binom not equal'
                NOTESLIST.append(
                    'take a closer look at %d' % ntpos
                )  #this will be added to the notelist for what went wrong exactly
                NOTESLIST.append([f_majornt, r_majornt, f_minornt, r_minornt])
                accept = False
            else:
                forwardMajorCount = fordict[f_majornt]
                forwardMinorCount = fordict[f_minornt]

                reverseMajorCount = revdict[r_majornt]
                reverseMinorCount = revdict[r_minornt]
                ALPHA = 0.05  #to check and make sure that it is signficiant, or in a significant number of reads

                pforward = 1 - binom.cdf(
                    forwardMinorCount, (forwardMajorCount + forwardMinorCount),
                    args.cutoff)  #calculating the p value
                preverse = 1 - binom.cdf(
                    reverseMinorCount,
                    (reverseMajorCount + reverseMinorCount), args.cutoff)
                if pforward <= ALPHA / 2 and preverse <= ALPHA / 2:
                    accept = True
                else:
                    accept = False
    return accept
Пример #2
0
def solve(N, X, Y):
    X = abs(X)

    z = xytotri(X + Y)
    #print "%s, %s, %s, %s" % (N, X, Y, z)

    if z >= N:
        return 0

    #print xytotri(X+Y+2)

    if xytotri(X + Y + 2) <= N:
        return 1

    rem = N - z

    #print "%s, %s, %s, %s, %s" % (N, X, Y, z, rem)
    #print  (((Y/2)+1)*2)+1
    #if X == 0 and rem < (((Y/2)+1)*2)+1:
    if X == 0 and xytotri(X + Y + 2) > N:
        return 0

    #print "n, z, x, y, rem", N, z, X, Y, rem

    #return 1-normal_estimate((Y+1)-1, 0.5, rem)

    if rem - (xytotri(X + Y + 2) - z - 1) / 2 > Y:
        return 1

    return 1 - binom.cdf((Y + 1) - 1, rem, 0.5)
Пример #3
0
def binomialcheck(majornt,minornt,fordict,revdict):

    forwardMajorCount = fordict[majornt]
    forwardMinorCount = fordict[minornt]

    reverseMajorCount = revdict[majornt]
    reverseMinorCount = revdict[minornt]
    percentVariant = 0.03
    ALPHA = 0.05

    pforward = 1 - binom.cdf( forwardMinorCount, (forwardMajorCount + forwardMinorCount), percentVariant)
    preverse = 1 - binom.cdf( reverseMinorCount, (reverseMajorCount + reverseMinorCount), percentVariant)
    if pforward <= ALPHA/2 and preverse <= ALPHA/2:
        accept = True
    else:
        accept = False
    return accept
Пример #4
0
def WFMD(N,m,g,k):
    '''
    The probability that in a population of N diploid individuals initially 
    possessing m copies of a dominant allele, we will observe after g 
    generations at least k copies of a recessive allele. Assume the 
    Wright-Fisher model.
    '''
    p = float(m)/(2*N)
    q = 1-p
    bc = binom.cdf(k,n,p)
    return 
Пример #5
0
def WFMD(N, m, g, k):
    '''
    The probability that in a population of N diploid individuals initially 
    possessing m copies of a dominant allele, we will observe after g 
    generations at least k copies of a recessive allele. Assume the 
    Wright-Fisher model.
    '''
    p = float(m) / (2 * N)
    q = 1 - p
    bc = binom.cdf(k, n, p)
    return
Пример #6
0
def _calculate_ci(p,sigma,n):
    """Return all indices j and k that correspond to confidence interval
    of level sigma for percentile p*100 along with the respective
    confidence levels

    Arguments:
    p    : p-quantile e.g. F(m_p) = P(X < m_p) = p for 0 < p < 1
    sigma: confidence interval level
    n    : number of samples

    Returns:
    (j_selection,k_selection,confidence_levels)

    We need to calculate

    B_{n,p}(k-1)-B_{n,p}(j-1) \leq \sigma

    Therefore, we do an exhaustive search for all values of k and j
    and then filter out

    See Jean-Yves Le Boudec, Performance Evaluation of Computer and
    Communication Systems, EPFL Press, 2010

    """

    j_k_range = np.arange(0,n)  # Already j-1 and k-1
    J = np.tile(j_k_range,(n,1)).T
    K = np.tile(j_k_range,(n,1))
    # print(J)
    # print(K)
    diff_Bk_Bj  = binom.cdf(K,n,p)-binom.cdf(J,n,p)
    j_all,k_all = np.where(diff_Bk_Bj >= sigma)  # We get too many of them
    if len(j_all) == 0:
        return None
    diff_k_j    = k_all-j_all  # Hence, find the minimum interval
    index_min_int = np.where(diff_k_j == diff_k_j.min())  # There might be several of them
    j_selection = j_all[index_min_int]+1  # j and k can range from 1 to n
    k_selection = k_all[index_min_int]+1
    confidence_levels = diff_Bk_Bj[j_selection-1,k_selection-1]
    # All confidence intervals and their confidence level
    return (j_selection,k_selection,confidence_levels)
Пример #7
0
def Compute_Binomial_Prob(Topic_List,Global_Topic_Count):
	"""Commutes pValues from a binomial probility distribution given a list of events
	   and a dictonary that descirbes the freqeuncy those events are expected to be 
	   observed at.  The values in the Topic_List must be the keys is in the Global_Topic_Count   
	Keywords:
	Topic_List -- List of all the topics that are being test for disbution, each value should have a labled topic and thats what this list is 
	Global_Topic_Count -- dictonary containing the expected distrbution of topics
	
	returns:
	List_of_Topics_Dict -- List of Dicts with keys as ['names','obs','expected','pval'] sorted by obs
	"""
	
	List_of_Topic_Dict =[]
	Global_Keys =Global_Topic_Count.keys()
	i = 0
	for key,val in dict(Counter(Topic_List)).items():
		List_of_Topic_Dict.append({'name':key,'obs':val,'exp':int(len(Topic_List)*Global_Topic_Count[Global_Keys[i]])})
		if  List_of_Topic_Dict[-1]['exp']>=List_of_Topic_Dict[-1]['obs']:
			List_of_Topic_Dict[-1]['pVal']=binom.cdf(List_of_Topic_Dict[-1]['obs'], len(Topic_List),Global_Topic_Count[Global_Keys[i]] )
		else:
			List_of_Topic_Dict[-1]['pVal']=1-binom.cdf(List_of_Topic_Dict[-1]['obs'], len(Topic_List), Global_Topic_Count[Global_Keys[i]])
		i +=1
	return sorted(List_of_Topic_Dict, key=lambda x: x['obs'], reverse=True)
Пример #8
0
 def _cihs_1D(data, alpha):
     data = np.sort(data.compressed())
     n = len(data)
     alpha = min(alpha, 1 - alpha)
     k = int(binom._ppf(alpha / 2.0, n, 0.5))
     gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
     if gk < 1 - alpha:
         k -= 1
         gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
     gkk = binom.cdf(n - k - 1, n, 0.5) - binom.cdf(k, n, 0.5)
     I = (gk - 1 + alpha) / (gk - gkk)
     lambd = (n - k) * I / float(k + (n - 2 * k) * I)
     lims = (lambd * data[k] + (1 - lambd) * data[k - 1], lambd * data[n - k - 1] + (1 - lambd) * data[n - k])
     return lims
Пример #9
0
 def _cihs_1D(data, alpha):
     data = np.sort(data.compressed())
     n = len(data)
     alpha = min(alpha, 1 - alpha)
     k = int(binom._ppf(alpha / 2., n, 0.5))
     gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
     if gk < 1 - alpha:
         k -= 1
         gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
     gkk = binom.cdf(n - k - 1, n, 0.5) - binom.cdf(k, n, 0.5)
     I = (gk - 1 + alpha) / (gk - gkk)
     lambd = (n - k) * I / float(k + (n - 2 * k) * I)
     lims = (lambd * data[k] + (1 - lambd) * data[k - 1],
             lambd * data[n - k - 1] + (1 - lambd) * data[n - k])
     return lims
Пример #10
0
                train = int(line.replace('(', '').replace(',', '').split()[0])
                continue

            test = int(line.replace('(', '').replace(',', '').split()[0])

        elif line.startswith('Test score:'):
            if firstTestScore is None:
                firstTestScore = float(line.replace('Test score: ', ''))
                continue

            secondTestScore = float(line.replace('Test score: ', ''))
            assert (train is not None)
            assert (test is not None)
            assert (testScoreAll is not None)

            a_cdf = 1 - binom.cdf(secondTestScore * test, test, firstTestScore)
            if firstTestScore > secondTestScore:
                a_cdf = -a_cdf
            b_cdf = 1 - binom.cdf(secondTestScore * test, test, testScoreAll)
            if testScoreAll > secondTestScore:
                b_cdf = -b_cdf
            #print(a_cdf, b_cdf)
            arr.append([
                name, train, test, firstTestScore, secondTestScore,
                testScoreAll, a_cdf, b_cdf
            ])

            name = None
            train = None
            test = None
            firstTestScore = None
Пример #11
0
def INDC(n, k, p):
    '''
    the probability of observating at least k 'heads' in 2n trials
    '''
    bc = binom.cdf(k, n, p)
    return math.log(bc, 10)
Пример #12
0
def INDC(n, k, p):
    bc = binom.cdf(k, n, p)
    return math.log(bc, 10)
Пример #13
0
def binomial(n, c, p):
    return binom.cdf(c, n, p)
Пример #14
0
def binom_test_low(n, N, p):
    return binom.cdf(n, N, p)
Пример #15
0
def INDC(n,k,p):
    '''
    the probability of observating at least k 'heads' in 2n trials
    '''
    bc = binom.cdf(k,n,p)
    return math.log(bc,10)
Пример #16
0
def pbinom(successes, fail, prob):
    """
    Returns cumulative binomial probability given number of 'successes' and 'failures'.
    """
    total = successes + fail
    return binom.cdf(successes, total, prob)
def calculate_p1_index(n_success, n_attempts, chance_of_success):
    return 1 - binom.cdf(n_success-1, n_attempts, chance_of_success)
Пример #18
0
def result(a, b):
    if alpha < binom.cdf(b, a + b + 1, 0.5) < 1- alpha:
        return '-'
    else:
        return '+'
def calculate_p2_index(n_success, n_attempts, chance_of_success):
    return 2 * min(calculate_p1_index(n_success, n_attempts, chance_of_success), binom.cdf(n_success, n_attempts, chance_of_success))
Пример #20
0
def binom_test_low(n, N, p):
    return binom.cdf(n, N, p)
Пример #21
0
def INDC(n,k,p):
    bc = binom.cdf(k,n,p)
    return math.log(bc,10)
Пример #22
0
def pbinom(successes, fail, prob):
    """
    Returns cumulative binomial probability given number of 'successes' and 'failures'.
    """
    total = successes + fail
    return binom.cdf(successes, total, prob)