Пример #1
0
def test_pmf_accuracy():
    """Compare accuracy of the probability mass function.

    Compare the results with the accuracy check proposed in [Hong2013]_,
    equation (15).
    """
    [p1, p2, p3] = np.around(np.random.random_sample(size=3), decimals=2)
    [n1, n2, n3] = np.random.random_integers(1, 10, size=3)
    nn = n1 + n2 + n3
    l1 = [p1 for i in range(n1)]
    l2 = [p2 for i in range(n2)]
    l3 = [p3 for i in range(n3)]
    p = l1 + l2 + l3
    b1 = binom(n=n1, p=p1)
    b2 = binom(n=n2, p=p2)
    b3 = binom(n=n3, p=p3)
    k = np.random.randint(0, nn + 1)
    chi_bn = 0
    for j in range(0, k+1):
        for i in range(0, j+1):
            chi_bn += b1.pmf(i) * b2.pmf(j - i) * b3.pmf(k - j)
    pb = PoiBin(p)
    chi_pb = pb.pmf(k)
    assert np.all(np.around(chi_bn, decimals=10) == np.around(chi_pb,
                                                              decimals=10))
Пример #2
0
def down_fade_gap_inner(N, tablefunc, target, tSNR_range, rSNR1_range, rSNR2, pa2=10**(-10)):
    for tSNR in tSNR_range:
        h2 = 10**((rSNR2 - tSNR)/10)
        pf2 = 1 - np.exp(-h2)
        # p2 = pf2 + (1-pf2)*pa2
        for rSNR1 in rSNR1_range:
            pa1 = tablefunc(rSNR1)
            h1 = 10**((rSNR1 - tSNR)/10) # linear fade
            pf1 = 1 - np.exp(-h1)
            # pf2c = 1
            pf2c = 1 - np.exp(h1-h2) if h2 > h1 else 0
            rv_g = binom(N, 1 - pf1)
            result = 0 # rv_g.pmf(0)
            for g in xrange(1, N+1, 1):
                rv_a = binom(g, 1 - pa1)
                a_range = np.arange(0, g+1, 1)
                qpf2 = np.power(pf2, a_range)
                qE = qpf2 + (1-qpf2)*pa2
                qB = qpf2 * pf2c + (1 - qpf2*pf2c)*pa2
                # qB = qE * (pf2c + (1 - pf2c)*pa2)
                psuccess = (1-qE)**(N-g) * np.power((1-qB), g-a_range)
                z = rv_g.pmf(g) * np.dot(rv_a.pmf(a_range), 1-psuccess)
                result += z
                if result > target:
                    break
            if result < target:
                return np.array([N, tSNR, rSNR1, rSNR2])
Пример #3
0
def show_binomial():
    """Show an example of binomial distributions"""
    
    bd1 = stats.binom(20, 0.5)
    bd2 = stats.binom(20, 0.7)
    bd3 = stats.binom(40, 0.5)
    
    k = np.arange(40)
    
    sns.set_context('paper')
    sns.set_style('ticks')
    mystyle.set(14)
    
    markersize = 8
    plt.plot(k, bd1.pmf(k), 'o-b', ms=markersize)
    plt.hold(True)
    plt.plot(k, bd2.pmf(k), 'd-r', ms=markersize)
    plt.plot(k, bd3.pmf(k), 's-g', ms=markersize)
    plt.title('Binomial distribuition')
    plt.legend(['p=0.5 and n=20', 'p=0.7 and n=20', 'p=0.5 and n=40'])
    plt.xlabel('X')
    plt.ylabel('P(X)')
    sns.despine()
    
    mystyle.printout_plain('Binomial_distribution_pmf.png')
    
    plt.show()
Пример #4
0
 def __init__(self, ngene, c=75, npool=4, epsilon=0.01,
              cutoffs=range(1,26), ntotal=4e+06, mutFac=3.):
     self.pmf = stats.binom(c, (1. - mutFac * epsilon) / npool)
     self.pmf2 = stats.binom(c, epsilon)
     self.pFail = self.pmf.cdf(numpy.array(cutoffs) - 1) # cdf() offset by one
     self. lambdaError = (float(ntotal) / ngene) \
                         * self.pmf2.sf(numpy.array(cutoffs) - 1)
     self.ngene = ngene
Пример #5
0
def UpdateK(k_old_list,z_old_list,T,lambda_old_list,rou_list,u_list,phi_list,alpha_hat,iterNum,eta):

    k_new_list = []
    z_new_list = []

    for ii in range(len(k_old_list)):
        #同phi,降维
        k_old = copy.deepcopy(k_old_list[ii])
        z_old = copy.deepcopy(z_old_list[ii])
        lambda_old = copy.deepcopy(lambda_old_list[ii])
        rou = copy.deepcopy(rou_list[ii])
        u = copy.deepcopy(u_list[ii])
        phi = copy.deepcopy(phi_list[ii])

        k_new =  []
        z_new =  []
        for t in range(T-1):
            dK = stats.binom(1,0.5)
            temp = dK.rvs(1)
            d_k = 0
            if temp[0] ==0:
                d_k = 1
            else:
                d_k = -1

            epsilon = stats.geom.rvs(1.0/(1+z_old[t]),loc=-1,size=1)
            k_new_temp = k_old[t]+d_k*epsilon[0]

            # step 3 and step 4
            if k_new_temp < 0:
                k_new.append(k_old[t])

                z_new.append(z_old[t])

            else:
                p_k = (lambda_old/((1-rou)*u))**k_old[t]*(phi[t]*lambda_old*rou/((1-rou)*u))**k_old[t]*\
                  phi[t+1]/(math.factorial(k_old[t])*spec.gamma(lambda_old+k_old[t]))

                p_k_new = (lambda_old/((1-rou)*u))**k_new_temp*(phi[t]*lambda_old*rou/((1-rou)*u))**k_new_temp*\
                  phi[t+1]/(math.factorial(k_new_temp)*spec.gamma(lambda_old+k_new_temp))

                ap = min(1,p_k_new/p_k)

                y_AP = stats.binom(1,ap)
                temp = y_AP.rvs(1)

                if temp[0] ==1:
                    k_new.append(k_new_temp)
                else:
                    k_new.append(k_old[t])

                temp_z = z_old[t]+ iterNum**(-1.0*eta)*(ap-alpha_hat)
                z_new.append(temp_z)

        k_new_list.append(k_new)
        z_new_list.append(z_new)

    return  k_new_list, z_new_list
Пример #6
0
def get_yield_params(npool=4, c=75, epsilon=0.01, nmut=50, ngene=4200,
                     ntotal=4.64e+06, mutFac=3, **kwargs):
    pmfMut = stats.binom(c, (1. - mutFac * epsilon) / npool)
    pmfErr = stats.binom(c, epsilon)
    def no_false_positives(cut):
        return pmfErr.sf(cut) * ntotal < 0.05
    i = find_cutoff(0, c, no_false_positives) + 1
    def too_many_false_negatives(cut):
        return pmfMut.cdf(cut) * nmut > 0.05
    j = find_cutoff(0, c, too_many_false_negatives)
    return pmfMut, pmfErr, i, j
Пример #7
0
def UpdateK(k_old,z_old,T,lambda_old,rou,u,phi,alpha,iterNum,eta):
	k_new =  []
	z_new =  []
	for t in range(T-1):
		dK = stats.binom(1,0.5)
		temp = dK.rvs(1)
		d_k = 0
		if temp[0] ==0:
			d_k = 1
		else:
			d_k = -1

		epsilon_K = stats.geom(1.0/(1+z_old[t]))
		epsilon = epsilon_K.rvs(1)

		k_new_temp = k_old[t]+d_k*epsilon[0]


		# step 3
		if k_new_temp < 0:
			k_new.append(k_old[t])

			z_new.append(z_old[t])

		else:
			p_k = (lambda_old/((1-rou)*u))**k_old[t]*(phi[t]*lambda_old*rou/((1-rou)*u))**k_old[t]*\
		      phi[t+1]/(math.factorial(k_old[t])*spec.gamma(lambda_old+k_old[t]))

			p_k_new = (lambda_old/((1-rou)*u))**k_new_temp*(phi[t]*lambda_old*rou/((1-rou)*u))**k_new_temp*\
		          phi[t+1]/(math.factorial(k_new_temp)*spec.gamma(lambda_old+k_new_temp))

			ap = min(1,p_k_new/p_k)

			y_AP = stats.binom(1,ap)
			temp = y_AP.rvs(1)

			if temp[0] ==0:
				k_new.append(k_new_temp)
			else:
				k_new.append(k_old[t])

			temp_z = z_old[t]+ iterNum**(-1.0*eta)*(ap-alpha)
			z_new.append(temp_z)

		# step 4

	print "k z new:\n"
	print k_new
	print z_new
	print "\n"

	return  k_new, z_new
Пример #8
0
def UpdateK_sigma(k_sigma_old,z_sigma_old,lambda_sigma,rou_sigma,u_sigma,sigma_2,iterNum,eta,alpha,T):

	k_sigma_new = []
	z_sigma_new = []

	for t in range(T-1):

		dK = stats.binom(1,0.5)
		temp = dK.rvs(1)
		d_k = 0
		if temp[0] ==0:
			d_k = 1
		else:
			d_k =-1

		epsilon_K = stats.geom(1.0/(1+z_sigma_old[t]))
		epsilon = epsilon_K.rvs(1)

		k_sigma_new_temp = k_sigma_old[t]+d_k*epsilon[0]

		# step 3
		if k_sigma_new_temp < 0:
			k_sigma_new.append(k_sigma_old[t])
			z_sigma_new.append(z_sigma_old[t])
		else:
			# step 2
			p_k_sigma = (lambda_sigma/((1-rou_sigma)*u_sigma))**k_sigma_old[t]*\
		            (sigma_2[t]*lambda_sigma*rou_sigma/((1-rou_sigma)*u_sigma))**k_sigma_old[t]\
		            *(sigma_2[t+1])**k_sigma_old[t]/(math.factorial(k_sigma_old[t])*spec.gamma(lambda_sigma+k_sigma_old[t]))

			p_k_sigma_new = (lambda_sigma/((1-rou_sigma)*u_sigma))**k_sigma_new_temp*\
		            (sigma_2[t]*lambda_sigma*rou_sigma/((1-rou_sigma)*u_sigma))**k_sigma_new_temp\
		            *(sigma_2[t+1])**k_sigma_new_temp/(math.factorial(k_sigma_new_temp)*spec.gamma(lambda_sigma+k_sigma_new_temp))


			ap = min(1,p_k_sigma_new/p_k_sigma)

			y_AP = stats.binom(1,ap)
			temp = y_AP.rvs(1)
			if temp[0] ==0:
				k_sigma_new.append(k_sigma_new_temp)
			else:
				k_sigma_new.append(k_sigma_old[t])

			# step 4
			temp_z = z_sigma_old[t]+ iterNum**(-1.0*eta)*(ap-alpha)
			z_sigma_new.append(temp_z)



	return  k_sigma_new,z_sigma_new
Пример #9
0
 def birth(M):
     if M == 0:
         return 0
     B1 = binom(N - M - 1, p * eta / N)
     B2 = binom(M, (1 - p) * eta / N)
     pmf1, pmf2 = B1.pmf, B2.pmf
     p2 = [pmf2(m) for m in range(M)]
     p2.reverse()
     p2s = list(np.cumsum(p2))
     p2s.reverse()
     b = 0.0
     for l in range(N - M):
         t = p2s[l + 1] if l + 1 < M else 0
         b += t * pmf1(l)
     return (N - M) / N * b
    def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0)
Пример #11
0
def llr(c1, c2, c12, n):
    # H0: Independence p(w1,w2) = p(w1,~w2) = c2/N
    p0 = c2 / n
    # H1: Dependence, p(w1,w2) = c12/N
    p10 = c12 / n
    # H1: p(~w1,w2) = (c2-c12)/N
    p11 = (c2 - c12) / n
    # binomial probabilities
    # H0: b(c12; c1, p0),  b(c2-c12; N-c1, p0)
    # H1: b(c12, c1, p10), b(c2-c12; N-c1, p11)
    probs = np.matrix([
        [binom(c1, p0).logpmf(c12), binom(n - c1, p0).logpmf(c2 - c12)],
        [binom(c1, p10).logpmf(c12), binom(n - c1, p11).logpmf(c2 - c12)]])
    # LLR = p(H1) / p(H0)
    return np.sum(probs[1, :]) - np.sum(probs[0, :])
Пример #12
0
def add_edges_exact(g, nodes, p, weighted=None,
                    links=None):
    """Add edges """
    if not links:
        links = set(frozenset((a, b))
                    for a in nodes for b in nodes
                    if a != b)
        assert len(links) == len(nodes) * (len(nodes)-1) / 2
    else:
        assert not nodes
    if weighted == 'exact':
        #for x in links:
        #    print x
        #    a, b = x
        e = 0
        for a,b in links:
            g.add_edge(a, b, weight=None)
            g.edge[a][b].setdefault('weights', []).append(p)
            assert g.edge[b][a]['weights'] == g.edge[a][b]['weights']
            #print g.edge[a][b]['weight']
            e += 1
        #print p, len(nodes), len(g.edges()), e
        #raw_input()
        return
    links = list(links)
    #nNodes = len(nodes)
    #nEdges = int(round(len(links) * p))
    nEdges = binom(len(links), p).rvs()
    edges = random.sample(links, nEdges)
    e = 0
    #for a,b in edges:
    #    g.add_edge(a, b)
    #    e += 1
    g.add_edges_from(edges)
    e += len(edges)
Пример #13
0
def mcnemar_midp(b, c):
    """
    Compute McNemar's test using the "mid-p" variant suggested by:
    
    M.W. Fagerland, S. Lydersen, P. Laake. 2013. The McNemar test for 
    binary matched-pairs data: Mid-p and asymptotic are better than exact 
    conditional. BMC Medical Research Methodology 13: 91.
    
    `b` is the number of observations correctly labeled by the first---but 
    not the second---system; `c` is the number of observations correctly 
    labeled by the second---but not the first---system.
    """
    n = b + c
    
    x = min(b, c)
    dist = binom(n, .5)
    p = 2. * dist.cdf(x)
    midp = p - dist.pmf(x)
    chi = float(abs(b - c)**2)/n
    
    print "b = ", b
    print "c = ", c
    print "Exact p = ", p
    print "Mid p = ", midp
    print "Chi = ", chi
Пример #14
0
def UpdateLambda(lambda_star_old,tao_lambda_star_old,s_star,u_star,m,u,eta,alpha,iterNum):
    log_lambda_star_X = stats.norm(loc =np.log(lambda_star_old),scale =np.sqrt(tao_lambda_star_old))
    log_lambda_star = log_lambda_star_X.rvs(1)
    lambda_star = np.exp(log_lambda_star[0])

    temp =1.0
    for item in u:
        temp = temp*item**lambda_star_old
    
    p_lambda_star = np.exp(-1.0*lambda_star_old/s_star)*(((lambda_star_old**lambda_star_old)/(u_star**lambda_star_old*spec.gamma(lambda_star_old)))**m)*np.exp(-1.0*lambda_star_old/u_star*sum(u))*temp

    temp =1.0
    for item in u:
        temp = temp*item**lambda_star
   
    p_lambda_star_new = np.exp(-1.0*lambda_star/s_star)*(((lambda_star**lambda_star)/(u_star**lambda_star*spec.gamma(lambda_star)))**m)*np.exp(-1.0*lambda_star/u_star*sum(u))*temp
    ap = min(1,p_lambda_star_new/p_lambda_star)
    # step 3

    y_AP = stats.binom(1,ap)
    lambda_star_new = lambda_star_old
    temp = y_AP.rvs(1)
    if temp[0] ==1:
        lambda_star_new = lambda_star

    # step 4

    log_tao_lambda_star = np.log(tao_lambda_star_old)+iterNum**(-1.0*eta)*(ap-alpha)
    tao_lambda_star_new = np.exp(log_tao_lambda_star)

    print "finished a iteration."
    return lambda_star_new,tao_lambda_star_new
Пример #15
0
def fig2():
    '''
        Plot histogram and solve characteristics of probability_cut_nooverlapsability, that we cut fibers.
        Compare with binomial distribution.
        Set n_sim >> 1
    '''
    figure( 2 )
    delta = 0.
    p = probability_cut_nooverlaps( spec.l_x, fib.lf, delta )

    rvb = binom( fib.n, p )
    rvp = poisson( fib.n * p )
    rvn = norm( fib.n * p, sqrt( fib.n * p * ( 1 - p ) ) )

    graph_from = floor( bin_mean - 4 * bin_stdv )
    graph_to = floor( bin_mean + 4 * bin_stdv ) + 1


    x = arange( graph_from , graph_to )
    plot( x, n_sim * rvb.pmf( x ), color = 'red', linewidth = 2, label = 'Binomial' )
    plot( x, n_sim * rvp.pmf( x ), color = 'green', linewidth = 2, label = 'Poisson' )
    plot( x, n_sim * rvn.pdf( x ), color = 'blue', linewidth = 2, label = 'Normal' )
    #plot( x, 20 * rv.pmf( x ) )

    pdf, bins, patches = hist( v, n_sim, normed = 0 ) #, facecolor='green', alpha=1
    #set_xlim( bin_mean - 2 * bin_stdv, bin_mean + 2 * bin_stdv )
    #plot( sx, sy, 'rx' )   # centroids
    #print sum( pdf * diff( bins ) )
    legend()
    draw()
Пример #16
0
 def death(M):
     if M == N:
         return 0
     B1 = binom(M - 1, p * eta / N)
     B2 = binom(N - M, (1 - p) * eta / N)
     pmf1, pmf2 = B1.pmf, B2.pmf
     p2 = [pmf2(l) for l in range(N - M + 1)]
     p2.reverse()
     d = 0.0
     p2s = list(np.cumsum(p2))
     p2s.reverse()
     for k in range(M):
         t = p2s[k + 1] if k + 1 < N - M + 1 else 0
         d += pmf1(k) * t
         print M, pmf1(k), k, t, d
     return M / N * d
Пример #17
0
def option_price(P, R, q):
    pv = P[:, -1]
    n = size(pv) - 1
    b = binom(n, 1 - q)
    vf = vectorize(lambda k: b.pmf(k))
    qv = vf(range(n + 1))
    return dot(qv, pv) / R ** n
Пример #18
0
 def simulate_experiment(self, modelparams, expparams, repeat=1):
     # FIXME: uncommenting causes a slowdown, but we need to call
     #        to track sim counts.
     #super(BinomialModel, self).simulate_experiment(modelparams, expparams)
     
     # Start by getting the pr(1) for the underlying model.
     pr1 = self.underlying_model.likelihood(
         np.array([1], dtype='uint'),
         modelparams,
         expparams['x'] if self._expparams_scalar else expparams)
         
     dist = binom(
         expparams['n_meas'].astype('int'), # ← Really, NumPy?
         pr1[0, :, :]
     )
     sample = (
         (lambda: dist.rvs()[np.newaxis, :, :])
         if pr1.size != 1 else
         (lambda: np.array([[[dist.rvs()]]]))
     )
     os = np.concatenate([
         sample()
         for idx in range(repeat)
     ], axis=0)
     return os[0,0,0] if os.size == 1 else os
Пример #19
0
def _calc_score(
    fore_hit_size, fore_size, back_hit_size, back_size,
    prob_fn=None,
):
    if prob_fn is None:
        prob_fn = "hypergeom"

    assert prob_fn in ["hypergeom", "binom"]

    if back_hit_size <= 0:
        return 0

    k = fore_hit_size
    n = fore_size
    K = back_hit_size
    N = back_size
    p = K / N

    if prob_fn == "hypergeom":
        binomial = stats.hypergeom(N, K, n)
    else:
        binomial = stats.binom(n, p)

    pr_gt_k = binomial.sf(k - 1)
    pr_lt_k = binomial.cdf(k)

    if pr_lt_k <= 0:
        return -200
    elif pr_gt_k <= 0:
        return 200
    else:
        return -np.log10(pr_gt_k / pr_lt_k)
Пример #20
0
def show_binomial():
    """Show an example of binomial distributions"""
    
    ns = [20,20,40]
    ps = [0.5, 0.7, 0.5]
    
    #markersize = 8
    for (p,n) in zip(ps, ns):
        bd = stats.binom(n,p)
        x = np.arange(n+1)
        plt.plot(x, bd.pmf(x), 'o--', label='p={0:3.1f}, n={1}'.format(p,n))
    
    plt.legend()
    #sns.set_context('poster')
    #sns.set_style('ticks')
    #mystyle.set(14)
    
    plt.title('Binomial distribuition')
    plt.xlabel('X')
    plt.ylabel('P(X)')
    #sns.despine()
    plt.annotate('Upper Limit', xy=(20,0), xytext=(27,0.04), 
                 arrowprops=dict(shrink=0.05))
    
    
    mystyle.printout_plain('Binomial_distribution_pmf.png')
    plt.show()
Пример #21
0
def add_edges_out_sparse(g, p, g_layers, weighted=False,
                         edges_constructor=add_edges_exact,
                         non_overlapping=False):
    #for g in g_layers:
    if len(g_layers) != 1:
        raise NotImplementedError("len(g_layers) > 1 in this function")
    cmtys = cmty.Communities.from_networkx(g_layers[0])
    if not cmtys.is_non_overlapping():
        raise NotImplementedError("overlapping in this function")
    # Total number of pairs of nodes
    nodes = g.nodes()
    n_links = len(g)*(len(g)-1) / 2
    # subtract total number of links in communites.
    n_links -= sum(s*(s-1)/2 for c, s in cmtys.cmtysizes().iteritems())
    n_links_wanted = binom(n_links, p).rvs()
    n_links_present = 0
    node_dict = g_layers[0].node
    #from fitz import interact ; interact.interact()
    while n_links_present < n_links_wanted:
        a = random.choice(nodes)
        b = random.choice(nodes)
        #if any(g_.node[n1]['cmtys']&g_.node[n2]['cmtys']  for g_ in g_layers):
        if node_dict[a]['cmtys']&node_dict[b]['cmtys']:
            continue
        if g.has_edge(a, b):
            continue
        g.add_edge(a, b)
        n_links_present += 1
Пример #22
0
def test_pmf_pb_binom():
    """Compare the probability mass function with the binomial limit case."""
    # For equal probabilites p_j, the Poisson Binomial distribution reduces to
    # the Binomial one:
    p = [0.5, 0.5]
    pb = PoiBin(p)
    bn = binom(n=2, p=p[0])

    # Compare to four digits behind the comma
    assert int(bn.pmf(0) * 10000) == int(pb.pmf(0) * 10000)

    # For different probabilities p_j, the Poisson Binomial distribution and
    # the Binomial distribution are different:
    pb = PoiBin([0.5, 0.8])
    bn = binom(2, p=0.5)
    assert int(bn.pmf(0) * 10000) != int(pb.pmf(0) * 10000)
def BootstrapFD(samp):
    fd = FreqDist(samp)
    f1 = float(fd.Nr(1))
    f2 = float(fd.Nr(2))
    N = float(fd.N())
    B = fd.B()
    # Undetected species & Coverage
    if f2 > 0.0:
        f0 = ceil(((N - 1.0) / N) * (f1 ** 2.0) / (2.0 * f2))
        C = 1.0 - f1 / N * (N - 1.0) * f1 / ((N - 1.0) * f1 + 2.0 * f2)
    else:
        f0 = ceil(((N - 1.0) / N) * f1 * (f1 - 1.0) / 2.0)
        C = 1.0 - f1 / N * (N - 1.0) * f1 / ((N - 1.0) * f1 + 2.0)
        # Correct abundances
    probs = array(fd.values()) / N
    lambdah = (1 - C) / sum(probs * (1 - probs) ** N)
    probs = probs * (1 - lambdah * (1 - probs) ** N)
    # P for unseen
    # paux = (1-C)/f0
    yield fd.values()
    popO = arange(B)
    dist = binom(n=N, p=1 - C)
    probsA = probs / sum(probs)
    while True:
        ns2 = dist.rvs()
        ns1 = int(N) - ns2
        if ns1 > 0:
            samp1 = list(choice(popO, size=ns1, replace=True, p=probsA))
        else:
            samp2 = []
        if ns2 > 0:
            samp2 = list(random_integers(B, B + int(f0) - 1, ns2))
        else:
            samp2 = []
        yield FreqDist(samp1 + samp2).values()
Пример #24
0
def mcnemar_midp(b, c):
    n = b + c
    x = min(b, c)
    dist = binom(n, 0.5)
    p = 2.0 * dist.cdf(x)
    midp = p - dist.pmf(x)
    return midp
Пример #25
0
def sample_histogram(hist, factor=2, trim=None):
    if trim is None:
        if len(hist) > 300:
            trim = get_trim(hist)
        else:
            trim = max(hist)
    else:
        trim = min(max(hist), trim * factor)

    hist = {k: v for k, v in hist.items() if k < trim}
    h = defaultdict(int)
    prob = 1.0 / factor

    for i, v in hist.items():
        if i < 100:
            b = binom(i, prob)
            probs = [b.pmf(j) for j in range(1, i + 1)]
        else:
            probs = poisson_dist(i * prob, i)

        for j, p in enumerate(probs):
            h[j + 1] += v * p

    h = dict(h)
    for i, v in h.items():
        d = v - round(v)
        h[i] = ceil(v) if random.random() < d else floor(v)
    return {k: v for k, v in h.items() if v > 0}   # remove 0 elements
def release_likelihood(amplitudes, available_vesicles, release_probability, mini_amplitude, mini_amplitude_stdev, measurement_stdev):
    """Return a measure of the likelihood that a synaptic response will have certain amplitude(s),
    given the state parameters for the synapse.
    
    Parameters
    ----------
    amplitudes : array
        The amplitudes for which likelihood values will be returned
    available_vesicles : int
        Number of vesicles available for release
    release_probability : float
        Probability for each available vesicle to be released
    mini_amplitude : float
        Mean amplitude of response evoked by a single vesicle release
    mini_amplitude_stdev : float
        Standard deviation of response amplitudes evoked by a single vesicle release
    measurement_stdev : float
        Standard deviation of response amplitude measurement errors
        
        
    For each value in *amplitudes*, we calculate the likelihood that a synapse would evoke a response
    of that amplitude. Likelihood is calculated as follows:
    
    1. Given the number of vesicles available to be released (nV) and the release probability (pR), determine
       the probability that each possible number of vesicles (nR) will be released using the binomial distribution
       probability mass function. For example, if there are 3 vesicles available and the release probability is
       0.1, then the possibilities are:
           vesicles released (nR)    probability
                                0    0.729
                                1    0.243
                                2    0.27
                                3    0.001
    2. For each possible number of released vesicles, calculate the likelihood that this possibility could
       evoke a response of the tested amplitude. This is calculated using the Gaussian probability distribution 
       function where µ = nR * mini_amplitude and σ = sqrt(mini_amplitude_stdev^2 * nR + measurement_stdev)
    3. The total likelihood is the sum of likelihoods for all possible values of nR.
    """
    amplitudes = np.array(amplitudes)
    
    likelihood = np.zeros(len(amplitudes))
    release_prob = stats.binom(available_vesicles, release_probability)
    
    n_vesicles = np.arange(available_vesicles + 1)
    
    # probability of releasing n_vesicles given available_vesicles and release_probability
    p_n = release_prob.pmf(n_vesicles)
    
    # expected amplitude for n_vesicles
    amp_mean = n_vesicles * mini_amplitude
    
    # amplitude stdev increases by sqrt(n) with number of released vesicles
    amp_stdev = (mini_amplitude_stdev**2 * n_vesicles + measurement_stdev**2) ** 0.5
    
    # distributions of amplitudes expected for n_vesicles
    amp_prob = p_n[None, :] * normal_pdf(amp_mean[None, :], amp_stdev[None, :], amplitudes[:, None])
    
    # sum all distributions across n_vesicles
    likelihood = amp_prob.sum(axis=1)
    
    return likelihood
    def __init__(self, alpha, beta, states=(0, 1), l=None):
        """
        Define a 2-state Markov chain from its state-change probabilities.

        Parameters
        ----------

        alpha, beta : float
            Probabilities for changing state; `alpha` is the probability for
            changing from the first state to the second, `beta` is the
            probability for changing from the second to the first.

        states : 2-tuple
            Labels for the two states; default is to label the first state
            with the integer 0, and the second with 1.  The labels should
            be legitimate dict keys.

        l : int
            Length of simulated sample paths; can also be separately set or
            changed with path_length().
        """
        self.alpha = alpha
        self.beta = beta
        self.state_a, self.state_b = states  # labels for states

        # Map state labels to 0, 1:
        self.s2int = {self.state_a : 0, self.state_b : 1}
        # Map 0, 1 to state labels:
        self.int2s = {0 : self.state_a, 1 : self.state_b}

        # Transition matrix; trans[j,i] = prob for move from j to i;
        # this is the right-multiplying form.
        self.trans = array([[1.-alpha, beta], [alpha, 1.-beta]])

        # Transition samplers from states 0, 1; note that the binom param is
        # the "success" (state=1) probability:
        self.samplers = [stats.binom(1, self.trans[1,0]).rvs,
                         stats.binom(1, self.trans[1,1]).rvs]

        # Equillibrium dist'n:
        self.p0_eq = beta/(alpha + beta)
        self.p1_eq = alpha/(alpha + beta)

        if l is not None:
            self.path_length(l)
        else:
            self.length = None
Пример #28
0
def show_binomial():
    """Show an example of binomial distributions"""
    
    bd1 = stats.binom(20, 0.5)
    bd2 = stats.binom(20, 0.7)
    bd3 = stats.binom(40, 0.5)
    
    k = np.arange(40)
    plt.plot(k, bd1.pmf(k), 'o-b')
    plt.hold(True)
    plt.plot(k, bd2.pmf(k), 'd-r')
    plt.plot(k, bd3.pmf(k), 's-g')
    plt.title('Binomial distribition')
    plt.legend(['p=0.5 and n=20', 'p=0.7 and n=20', 'p=0.5 and n=40'])
    plt.xlabel('X')
    plt.ylabel('P(X)')
    plt.show()
Пример #29
0
 def assert_counts_are_ok(idx_counts, p):
     # Here we test that the distribution of the counts
     # per index is close enough to a binomial
     threshold = 0.05 / n_splits
     bf = stats.binom(n_splits, p)
     for count in idx_counts:
         p = bf.pmf(count)
         assert_true(p > threshold, "An index is not drawn with chance corresponding " "to even draws")
Пример #30
0
def xor_analysis_new(N, tSNR, rSNRdu, rSNR3, p_a1=10**(-9), p_a2=10**(-9), p_a3=10**(-9)):
    if rSNRdu > tSNR: return 0
    h_du = 10**((rSNRdu - tSNR)/10) # linear fade
    h_xor = 10**((rSNR3 - tSNR)/10)
    # Probability fade is bad
    p_f1 = p_f2 = 1 - np.exp(-h_du)
    p_f3 = 1 - np.exp(h_du-h_xor) if h_xor > h_du else 0

    p_link_1 = p_f1 + (1 - p_f1) * p_a1
    p_link_2 = p_f2 + (1 - p_f2) * p_a2

    result = 0
    rv_gc = binom(N, 1 - p_f1)
    for gc in range(0, N+1):
        rv_ad = binom(gc, 1 - p_a1)
        for ad in range(0, gc+1):
            rv_ad_tilde = binom(ad, 1 - p_a2)
            bu = gc - ad
            rv_bu = binom(ad, 1-p_link_2)
            kad = np.arange(1, ad+1)
            s_bu_tilde = (1 - p_f3) + p_f3 * np.dot(rv_bu.pmf(kad), 1-np.power(p_f3, kad)) if p_f3 else 1
            # s_bu_tilde = (1 - p_f3) + p_f3 * sum([rv_bu.pmf(k) * (1 - p_f3**k) for k in range(1, ad+1)]) if p_f3 else 1
            q_bu_tilde = (1 - s_bu_tilde) + s_bu_tilde * p_a3
            for ad_tilde in range(0, ad+1):
                rv_ad_tilde_s = binom(ad_tilde, 1 - p_f3)
                rv_ad_hat_s = binom(ad - ad_tilde, 1 - p_f3)
                # if p_f3 = 0 then ad_tilde_s should = ad_tilde because ad_tilde_i should be empty
                for ad_tilde_s in range(0 if p_f3 else ad_tilde, ad_tilde+1):
                    # ad_tilde already succeeded
                    # if p_f3 = 0 then ad_hat_s should = ad - ad_tilde because ad_hat_i should be empty
                    for ad_hat_s in range(0 if p_f3 else ad-ad_tilde, ad-ad_tilde+1):
                        ad_s = ad_tilde_s + ad_hat_s
                        ad_i = ad - ad_tilde_s - ad_hat_s # ad - ad_s
                        rv_ads = binom(ad_s, 1 - p_link_2)
                        rv_adi = binom(ad_i, 1 - p_link_2)
                        q_ad_hat_s = p_a3
                        q_ad_hat_i = p_link_2**ad_s + (1 - p_link_2**ad_s) * p_a3
                        # This is the problem zone that doesn't support vectorization (?)
                        ks, ki = np.arange(1, ad_s+1), np.arange(0, ad_i+1)
                        f_e = p_link_2**ad_s + (np.dot(rv_ads.pmf(ks), np.power(p_f3, ks)) * np.dot(rv_adi.pmf(ki), np.power(p_f3, ki)) if p_f3 else 0)
                        q_e = f_e + (1 - f_e) * (1 - (1 - p_a3)**2)
                        rv_bu_tilde = binom(bu, 1 - p_a2)
                        bu_tilde = np.arange(0, bu+1)
                        log_pstates = np.log10(rv_gc.pmf(gc)) + np.log10(rv_ad.pmf(ad)) + np.log10(rv_ad_tilde.pmf(ad_tilde)) + np.log10(rv_ad_tilde_s.pmf(ad_tilde_s)) + np.log10(rv_ad_hat_s.pmf(ad_hat_s))
                        bu_tilde_res = np.dot(rv_bu_tilde.pmf(bu_tilde), np.multiply(np.power(1 - q_e, N-ad-bu_tilde), np.power(1 - q_bu_tilde, bu_tilde)))
                        res = 10**log_pstates * (1 - q_ad_hat_s)**ad_hat_s * (1 - q_ad_hat_i)**(ad - ad_tilde - ad_hat_s) * bu_tilde_res
                        result += res
                        # print res, result
                        # print gc, ad, ad_tilde, ad_tilde_s, ad_hat_s, bu_tilde
                        # print "\n"
    return result
Пример #31
0
def forward_summing(P, player, alpha, beta, gamma):
    """
    Args
    ----
    P : np.array
        position specific elite transition matrix
    player : pd.DataFrame.GroupBy
        Player-year groupby pandas dataframe
    alpha : np.array
        position specific intercepts
    beta : np.array
        park specific coef
    gamma : np.array
        position specific spline coefs

    Returns
    ------
    M
        a transition matrix thingy
    """

    # Get the position for the first year for the player
    pos_0 = player.position_main[0]
    # Initial transition matrix based on position
    init_P = P[pos_0, :, :]

    years = len(player)

    pi = np.array([init_P[0, 1], init_P[1, 0]])

    M = np.zeros((years, 2))
    M[0, :] = pi

    for yr in range(1, years):
        pos_n = player.position_main[yr]
        new_P = P[pos_n, :, :]

        Rtemp = M[yr - 1, :].dot(new_P).reshape(1, -1)

        age = player.playerAge[yr]
        ab = player.AB[yr]
        hrs = player.HR[yr]

        age_traj_val = age_trajectory(age, gamma[pos_n])
        theta0 = theta(alpha[pos_n, 0], beta[pos_n], age_traj_val)
        theta1 = theta(alpha[pos_n, 1], beta[pos_n], age_traj_val)

        p0 = stats.binom(n=ab, p=theta0).pmf(k=hrs)
        p1 = stats.binom(n=ab, p=theta1).pmf(k=hrs)

        # does it make sense to make these values arbitrarily small
        # should something else happen here?
        if p0 == 0:
            p0 = 1e-32

        if p1 == 0:
            p1 = 1e-32

        Rtemp *= np.array([p0, p1])
        Rtemp /= Rtemp.sum()

        M[yr, :] = Rtemp

    return M
 def expected_distn_unbiased(self):
     rv = scs.binom(self.n, self.p)
     mu = rv.mean()
     sd = rv.std()
     print("The expected distribution for a fair coin is mu=%s, sd=%s"%(mu,sd))
## declare variables
font_size = 11
font_name = 'sans-serif'
n = 10000
fig = plt.figure(figsize=(10, 6), dpi=300)
splot = 0

## looxp through parameterizations of the beta
for n, p in [(5, 0.25), (5, 0.5), (5, 0.75)]:
    splot += 1
    ax = fig.add_subplot(1, 3, splot)

    x = np.arange(scs.binom.ppf(0.01, n, p), scs.binom.ppf(0.99, n, p))
    ax.plot(x, scs.binom.pmf(x, n, p), 'bo', ms=8, label='pmf')
    ax.vlines(x, 0, scs.binom.pmf(x, n, p), colors='b', lw=5, alpha=0.5)
    rv = scs.binom(n, p)

    ax.set_ylim((0, 1.0))
    ax.set_xlim((-0.5, 4.5))
    ax.set_title("n=%s,p=%s" % (n, p))
    ax.set_aspect(1. / ax.get_data_ratio())

    for t in ax.get_xticklabels():
        t.set_fontsize(font_size - 1)
        t.set_fontname(font_name)
    for t in ax.get_yticklabels():
        t.set_fontsize(font_size - 1)
        t.set_fontname(font_name)

plt.savefig("binomial.png", dpi=400)
plt.show()
Пример #34
0
print(dta[[
    'NABOVE', 'NBELOW', 'LOWINC', 'PERASIAN', 'PERBLACK', 'PERHISP', 'PERMINTE'
]].head(10))

print(dta[[
    'AVYRSEXP', 'AVSALK', 'PERSPENK', 'PTRATIO', 'PCTAF', 'PCTCHRT', 'PCTYRRND'
]].head(10))

formula = 'NABOVE + NBELOW ~ LOWINC + PERASIAN + PERBLACK + PERHISP + PCTCHRT '
formula += '+ PCTYRRND + PERMINTE*AVYRSEXP*AVSALK + PERSPENK*PTRATIO*PCTAF'

# #### Aside: Binomial distribution

# Toss a six-sided die 5 times, what's the probability of exactly 2 fours?

stats.binom(5, 1. / 6).pmf(2)

from scipy.misc import comb
comb(5, 2) * (1 / 6.)**2 * (5 / 6.)**3

from statsmodels.formula.api import glm
glm_mod = glm(formula, dta, family=sm.families.Binomial()).fit()

print(glm_mod.summary())

# The number of trials

glm_mod.model.data.orig_endog.sum(1)

glm_mod.fittedvalues * glm_mod.model.data.orig_endog.sum(1)
def plotear(U, E, G, N, P, B, PS, H, EM):
    # distribuciones continuas
    # -------------Graficar uniforme---------------
    numerosUniformes = ss.uniform.rvs(size=1000, loc=1, scale=3)
    sns.kdeplot(numerosUniformes, label="Distribución esperada")
    sns.distplot(U,
                 hist_kws=dict(edgecolor="k"),
                 label="Distribución observada")
    plt.title("Distribución Uniforme")
    plt.legend(loc="upper left")
    plt.show()

    # -------------Graficar Exponencial---------------
    numerosExponenciales = ss.expon.rvs(size=1000, loc=0, scale=1)
    sns.kdeplot(numerosExponenciales, label="Distribución esperada")
    sns.distplot(E,
                 hist_kws=dict(edgecolor="k"),
                 label="Distribución observada")
    plt.title("Distribución Exponencial")
    plt.legend(loc="upper left")
    plt.show()

    # -------------Graficar Gamma---------------
    plt.title("Distribución Gamma")
    plt.hist(G, alpha=1, edgecolor='black')
    plt.show()

    # -------------Graficar Normal---------------
    numerosNormales = ss.norm.rvs(size=1000, loc=2.35, scale=30)
    sns.kdeplot(numerosNormales, label="Distribución esperada")
    sns.distplot(N,
                 hist_kws=dict(edgecolor="k"),
                 label="Distribución observada")
    plt.title("Distribución Normal")
    plt.legend(loc="upper left")
    plt.show()

    # distribuciones discretas

    # -------------Graficar Pascal---------------
    plt.title("Distribución Pascal")
    plt.hist(P, alpha=1, edgecolor='black')
    plt.show()

    #------------ Graficar binomial--------------
    N, p = 30, 0.4  # parametros de forma
    binomial = ss.binom(N, p)  # Distribución
    x = np.arange(binomial.ppf(0.01), binomial.ppf(0.99))
    fmp = binomial.pmf(x)  # Función de Masa de Probabilidad
    plt.plot(x, fmp, '--', label="Distribución esperada")
    sns.distplot(B,
                 hist_kws=dict(edgecolor="k"),
                 label="Distribución observada")
    plt.title("Distribución Binomial")
    plt.ylabel('probabilidad')
    plt.xlabel('valores')
    plt.legend(loc="upper left")
    plt.show()

    # -------------Graficar Poisson---------------
    poisson = ss.poisson(3.6)
    xLine = np.arange(poisson.ppf(0.01), poisson.ppf(0.99))
    fmp = poisson.pmf(xLine)
    plt.plot(xLine, fmp, label="Distribución esperada")
    sns.distplot(PS,
                 hist_kws=dict(edgecolor="k"),
                 label="Distribución observada")
    plt.title("Distribución de Poisson")
    plt.legend(loc="upper left")
    plt.show()

    # -------------Graficar Hiper---------------
    plt.title("Distribución Hipergeometrica")
    plt.hist(H, alpha=1, edgecolor='black')
    plt.show()
    #-------------Graficar empirica------------
    sns.distplot(EM,
                 hist_kws=dict(edgecolor="k"),
                 label="Distribución observada")
    plt.title("Distribucion Empirica")
    plt.show()
Пример #36
0
                           num_workers=config.opts.cpus,
                           shuffle=shuffle,
                           pin_memory='cuda' in str(config.opts.device),
                           worker_init_fn=lambda _: np.random.seed(
                               int(torch.initial_seed()) % (2**32 - 1)))


dataset_train: NodeCountDataset = torch.load(folder_data / 'train.pt')
dataset_val: NodeCountDataset = torch.load(folder_data / 'val.pt')
dataset_test: NodeCountDataset = torch.load(folder_data / 'test.pt')
dataloader_train = make_dataloader(dataset_train, shuffle=True)
dataloader_val = make_dataloader(dataset_val, shuffle=False)
dataloader_test = make_dataloader(dataset_test, shuffle=False)

if dataset_train.informative_features > 0:
    binom = stats.binom(n=dataset_train.max_nodes,
                        p=.5**dataset_train.informative_features)

    def weight_fn(targets):
        return targets.new_tensor(-binom.logpmf(targets.cpu().numpy()))
else:

    def weight_fn(targets):
        return torch.ones_like(targets)


epoch_bar_postfix = {}
epoch_start = train_state.epochs + 1
epoch_end = train_state.epochs + 1 + config.training.epochs
epoch_bar = tqdm.trange(epoch_start,
                        epoch_end,
                        desc='Training',
Пример #37
0
def pipeline(ax, model, n, lo, value, threshold):
    plot_model(ax, model, n, lo, value)
    p = calc_p_value(model, value)
    if p < threshold:
        print('Reject Null Hypothesis')
    else:
        print('Fail to Reject Null Hypothesis')


if __name__ == "__main__":
    # Part 2

    #1  Null hypothesis is that Muriel cannot tell if the water was poured before of
    # after the milk.  Binomial. Faled to reject null p = 30.4%

    binomial = stats.binom(n=137, p=0.5)

    fig, ax = plt.subplots()
    pipeline(ax, binomial, 137, 40, 72, 0.05)

    #2 Null hypothesis is that we  haven't progressed on our heelflip capability
    # Rejected null p = 2.8%

    n = 122
    value = 72
    p = 0.5
    threshold = .05

    binomial = stats.binom(n=n, p=p)

    fig, ax = plt.subplots()
Пример #38
0
from scipy.stats import binom
n = 4
p = 4 / 5
rv = binom(n, p)

# more than 2 hits => P(X>2) = P(X=3) + P(X=4)
P0 = rv.pmf(3) + rv.pmf(4)
print('%.3f' % P0)

# at least 3 misses => P(X<=1) = P(X=0) + P(X=1)
P1 = rv.pmf(0) + rv.pmf(1)
print('%.3f' % P1)
cars_x.value_counts()
p_no_cars_x = (cars_x == 0).mean()
p_at_least_3_x = (cars_x >= 3).mean()
p_at_least_1_x = (cars_x >= 1).mean()

grades = stats.norm(3, .3)
top_5 = grades.isf(.05)
third_dec_top = grades.isf(.7)
third_dec_bot = grades.ppf(.2)

grades_x = .3 * np.random.randn(1000000) + 3
top_5_x = np.percentile(grades_x, 95)
third_dec_top_x = np.percentile(grades_x, 30)
third_dec_bot_x = np.percentile(grades_x, 20)

clicks = stats.binom(4326, .02)
at_least_97 = clicks.sf(96)

clicks_x = np.random.binomial(4326, .02, 1000000)
at_least_97_x = (clicks_x >= 97).mean()

in_first_60 = stats.binom(60, .01).sf(0)

in_first_60_x = (np.random.binomial(60, .01, 1000000) >= 1).mean()

n_visitors = round(.9 * 22 * 3)
clean_up = stats.binom(n_visitors, .03)
p_day_clean = clean_up.sf(0)
p_2_unclean = clean_up.cdf(0)**2
p_week_unclean = clean_up.cdf(0)**5
Пример #40
0
def get_p_value(n, p, x):
    dist = st.binom(n=n, p=p)
    cdf = dist.cdf
    return (cdf(x) - cdf(4) + cdf(n-5) - cdf(n-x-1)) / (cdf(n-5) - cdf(4)) if x < n/2 else 1
Пример #41
0
sns.set_style("whitegrid")

n_params = [1, 2, 4]
p_params = [0.25, 0.5, 0.75]

x = np.arange(0, max(n_params) + 1)

fig, ax = plt.subplots(len(n_params), len(p_params), sharex=True, sharey=True)

for i in range(len(n_params)):
    for j in range(len(p_params)):
        n = n_params[i]
        p = p_params[j]

        y = stats.binom(n=n, p=p).pmf(x)

        ax[i, j].vlines(x, 0, y, colors="b", lw=5)
        ax[i, j].set_ylim(0, 1)
        ax[i, j].plot(0, 0, label="n = {:3.2f}\np={:3.2f}".format(n, p), alpha=0)
        ax[i, j].legend(fontsize=12)

ax[2, 1].set_xlabel(r"$\theta$", fontsize=14)
ax[1, 0].set_ylabel(r"$p(y|\theta)$", fontsize=14)
ax[0, 0].set_xticks(x)


plt.tight_layout()

plt.savefig("plots/binomial_distribution.png")
Пример #42
0
import distfit
# print(distfit.__version__)
# print(dir(distfit))

# %% Multiple distributions as input
from distfit import distfit
X = np.random.normal(0, 2, 10000)
y = [-8, -6, 0, 1, 2, 3, 4, 5, 6]
dist = distfit(stats='RSS', distr=['norm', 't', 'gamma'])
results = dist.fit_transform(X)

# %% Discrete example
from distfit import distfit
from scipy.stats import binom
# Generate random numbers
X = binom(8, 0.5).rvs(1000)

dist = distfit(method='discrete', f=1.5, weighted=True, stats='wasserstein')
model = dist.fit_transform(X, verbose=3)
dist.plot()

# Make prediction
results = dist.predict([0, 1, 10, 11, 12])
dist.plot()

# Generate samples
Xgen = dist.generate(n=1000)
dist.fit_transform(Xgen)
results = dist.predict([0, 1, 10, 11, 12])
dist.plot()
Пример #43
0
 def func(self, x):
     return binom(self.n, self.p).pmf(x)
Пример #44
0
    def _reset_distribution(self):

        self._distribution: rv_discrete = binom(self._n, self._p)
Пример #45
0
 def __init__(self, num_trials, probability):
     self.num_trials = num_trials
     self.probability = probability
     self.expectation = num_trials * probability
     self.standard_deviation = math.sqrt(self.expectation * (1 - probability))
     self._binomial = stats.binom(num_trials, probability)
Пример #46
0
def p_y_theta(y, n, theta):
    rv = binom(n, theta)
    return rv.pmf(y)
Пример #47
0
# распределение Бернулли

# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.bernoulli.html

bernoulli_rv = sts.bernoulli(0.7)

b = bernoulli_rv.rvs(300)

print(abs(np.sum(b) - 300 * 0.7) / 300)

# биномиальное распределение

# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.binom.html

binomial_rv = sts.binom(20, 0.9)

x = np.linspace(0, 20, 21)
cdf = binomial_rv.cdf(x)
plt.step(x, cdf)

plt.ylabel('$F(x)$')
plt.xlabel('$x$')

pmf = binomial_rv.pmf(x)
plt.plot(x, pmf, 'o')

plt.ylabel('$P(X=x)$')
plt.xlabel('$x$')

# распределение Пуассона
Пример #48
0
# Define the distribution parameters to be plotted
n_values = [40, 4]
b_values = [0.35, 0.35]
linestyles = ['-', '--']
color = ['red', 'blue']
color2 = ['yellow', 'green']
x = np.arange(-1, 200)

#------------------------------------------------------------
# plot the distributions
fig, ax = plt.subplots(figsize=(5, 3.75))

for (n, b, ls, col, col2) in zip(n_values, b_values, linestyles, color,
                                 color2):
    # create a binomial distribution
    dist = binom(n, b)
    poi = poisson(n * b)
    mu = dist.mean()
    std = dist.std()
    mu2 = poi.mean()
    std2 = poi.std()

    plt.plot(x,
             dist.pmf(x),
             ls=ls,
             c=col,
             label=r'$b=%.2f,\ n=%i$' % (b, n),
             linestyle='steps-mid')
    plt.plot(x,
             poi.pmf(x),
             ls=ls,
Пример #49
0
 def test_inf_domain(self, domain):
     with pytest.raises(ValueError, match=r"must be finite"):
         DiscreteAliasUrn(stats.binom(10, 0.2), domain=domain)
Пример #50
0
sampleVariance = runningSum / N

MEAN = P
STANDARD_DEVIATION = math.sqrt(sampleVariance)
MEDIAN = 1

data = {
    "Sample Mean": MEAN,
    "Sample Variance": sampleVariance,
    "Sample Standard Deviation": STANDARD_DEVIATION,
    "Sample Median": MEDIAN,
    "N": N,
    "P": P
}
writeJsonFile('reports/statistics.json', data)

from scipy.stats import binom
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

x = range(N - 1)
rv = binom(N, P)
print(N, P)
expected_rb = binom(N, 0.5)
ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=10)
ax.legend(loc='best', frameon=False)
plt.suptitle('Probablity of Explicitily Typed Languages')
plt.ylabel("pmf")
plt.xlabel("# of projects that are explicitly typed")
plt.show()
Пример #51
0
#!/usr/bin/env python

from __future__ import print_function
import argparse
import sys
import math
from scipy.stats import binom

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('infile',
                        nargs='?',
                        type=argparse.FileType('r'),
                        default=sys.stdin)
    parser.add_argument('-n', '--nalleles', type=int, default=2)
    args = parser.parse_args()

    with args.infile as f:
        for line in f.readlines():
            line = line.strip()
            items = line.split()

            n = int(items[0])

            dist = binom(2 * n, 0.5)
            bsf = [math.log10(binom.sf(i, 2 * n, 0.5)) for i in range(2 * n)]
            print(" ".join([str(r) for r in bsf]))
Пример #52
0
a = hg.cdf(0)
print("a:", a)

M = 25
n = 3
N = 1
hg = stats.hypergeom(M, n, N)
b = hg.cdf(1) - hg.cdf(0)
print("b:", b)

#Ejercicio 5.37 ----------------------------------------
print("\nEJERCICIO 5.37 *************************************")

n = 3
p = 3 / 25
bi = stats.binom(n, p)
a = bi.cdf(0)
print("a:", a)

n = 3
p = 1 / 25
bi = stats.binom(n, p)
b = bi.cdf(3) - bi.cdf(0)
print("b:", b)

#Ejercicio 5.39
print("\nEJERCICIO 5.39 *************************************")
n = 10
p = .5
bi = stats.binom(n, p)
a = 1 - bi.cdf(2)
Пример #53
0
print("Original probablity for exact 5 'D':", P)
print("Probablity using built in func", stats.binom.pmf(5, 50, 1 / 5))
fig, ax = plt.subplots(1, 1)
mean, var, skew, kurt = stats.binom.stats(Number_of_question,
                                          P,
                                          moments='mvsk')

x = np.arange(stats.binom.ppf(0.01, Number_of_question, P),
              stats.binom.ppf(0.99, Number_of_question, P))
ax.plot(x,
        stats.binom.pmf(x, Number_of_question, P),
        'bo',
        ms=8,
        label='binom pmf')
ax.vlines(x,
          0,
          stats.binom.pmf(x, Number_of_question, P),
          colors='b',
          lw=5,
          alpha=0.5)
rv = stats.binom(Number_of_question, P)
ax.vlines(x,
          0,
          rv.pmf(x),
          colors='k',
          linestyles='-',
          lw=1,
          label='frozen pmf')
ax.legend(loc='best', frameon=False)
plt.show()
Пример #54
0
'''

                            Online Python Compiler.
                Code, Compile, Run and Debug python program online.
Write your code in this editor and press "Run" button to execute it.

'''

import math

from scipy import stats
X = stats.binom(6, 2 / 3)  # Declare X to be a binomial random variable
print(
    "Probability that in the next 6 trials, there will be atleast 4 successes")
print(X.pmf(4) + X.pmf(5) + X.pmf(6), "\n")  # P(X = 0)
# A marketing website has an average click-through rate of 2%.
# One day they observe 4326 visitors and 97 click-throughs.
# How likely is it that this many people or more click through?
'''

mean click through rate = .02

observed 4326 visitors 

'''

n = 4326

prob = .02

dist = binom(n, prob)

dist.sf(97)

trials = rows = 10_000

samples = cols = 4326

data = np.random.uniform(1, 101, samples * trials).reshape(rows, cols)

data = pd.DataFrame(data)

(((data < 3).sum(axis=1)) >= 97).sum() / trials

# You are working on some statistics homework consisting of 100 questions
# where all of the answers are a probability rounded to the hundreths place.
Пример #56
0
 def sample(self, N=None):
     return binom(self.n, self.p).rvs(N, random_state=self.random)
Пример #57
0
def peak_plot(peak,
              sample_table=None,
              max_dist=None,
              norm_on_center=True,
              log_y=True,
              marker_list=None,
              color_list=None,
              guidelines=None,
              guideline_colors=None,
              legend_off=False,
              legend_col=2,
              ax=None,
              figsize=None,
              save_fig_to=None):
    """Plot the distribution of spike_in peak
    Plot a scatter-line plot of [adjusted] number of sequences with i edit distance from center sequence (spike-in seq)

    Args:
        peak (Peak): a Peak instance
        sample_table (pd.DataFrame): abundance of sequence in samples. With samples as columns. If None, try `peak.seqs`
        max_dist (int): maximum distance to survey. If None, try `peak.radius`
        norm_on_center (bool): if the counts/abundance are normalized to the peak center
        log_y (bool): if set the y scale as log
        marker_list (list of str): overwrite default marker scheme if not `None`, same length and order as
          samples in sample_table
        color_list (list of str): overwrite default color scheme if not `None`, same length and order as
          samples in sample_table
        guidelines (list of float): add a series of guidelines of the peak shape with certain mutation rates, optional
        guideline_colors (list of color): the color of guidelines, same shape as guidelines
        legend_off (bool): do not show the legend if True
        legend_col (int): number of col for legend if show
        ax (matplotlib.Axis): if use external ax object to plot. Create a new figure if None
        figsize (2-tuple): size of the figure
        save_fig_to (str): save the figure to file if not None

    Returns:
        ax for plotted figure
    """

    import numpy as np

    if sample_table is None:
        if isinstance(peak.seqs, pd.DataFrame):
            sample_table = peak.seqs
        else:
            logging.error('Please indicate sample_table')
            raise ValueError('Please indicate sample_table')

    if max_dist is None:
        if peak.radius is None:
            logging.error('Please indicate the maximum distance to survey')
            raise ValueError('Please indicate the maximum distance to survey')
        else:
            max_dist = peak.radius

    if marker_list is None:
        marker_list = Presets.markers(num=sample_table.shape[1],
                                      with_line=True)
    elif len(marker_list) != sample_table.shape[1]:
        logging.error(
            'Error: length of marker_list does not align with the number of valid samples to plot'
        )
        raise Exception(
            'Error: length of marker_list does not align with the number of valid samples to plot'
        )
    if color_list is None:
        color_list = Presets.color_tab10(num=sample_table.shape[1])
    elif len(color_list) != sample_table.shape[1]:
        logging.error(
            'Error: length of color_list does not align with the number of valid samples to plot'
        )
        raise Exception(
            'Error: length of color_list does not align with the number of valid samples to plot'
        )

    if ax is None:
        if figsize is None:
            figsize = (max_dist / 2, 6) if legend_off else (max_dist / 2 + 5,
                                                            6)
        fig, ax = plt.subplots(1, 1, figsize=figsize)

    rel_abun, _ = peak.peak_abun(max_radius=max_dist,
                                 table=sample_table,
                                 use_relative=norm_on_center)

    for sample, color, marker in zip(sample_table.columns, color_list,
                                     marker_list):
        ax.plot(rel_abun.index,
                rel_abun[sample],
                marker,
                color=color,
                label=sample,
                ls='-',
                alpha=0.5,
                markeredgewidth=2)
    if log_y:
        ax.set_yscale('log')
    ylim = ax.get_ylim()

    # add guide line if applicable
    if guidelines is not None:
        if not norm_on_center:
            logging.warning(
                'Can only add guidelines if peaks are normed on center, skip guidelines'
            )
        else:
            # assuming a fix error rate per nt, iid on binom
            from scipy.stats import binom
            if isinstance(guidelines, (float, int)):
                err_guild_lines = [guidelines]
            if guideline_colors is None:
                guideline_colors = Presets.color_tab10(num=len(guidelines))

            dist_series = np.arange(max_dist + 1)
            for ix, (p, color) in enumerate(zip(guidelines, guideline_colors)):
                rv = binom(len(peak.center_seq), p)
                pmfs = np.array([rv.pmf(x) for x in dist_series])
                pmfs_normed = pmfs / pmfs[0]
                ax.plot(dist_series,
                        pmfs_normed,
                        color=color,
                        ls='--',
                        alpha=(ix + 1) / len(guidelines),
                        label=f'p = {p}')
    ax.set_ylim(ylim)
    y_label = ''
    if norm_on_center:
        y_label += ' normed'
    y_label += ' counts'
    ax.set_ylabel(y_label.title(), fontsize=14)
    ax.set_xlabel('Distance to peak center', fontsize=14)
    if not legend_off:
        ax.legend(loc=[1.02, 0], fontsize=9, frameon=False, ncol=legend_col)
    plt.tight_layout()

    if save_fig_to:
        fig = plt.gcf()
        fig.patch.set_facecolor('none')
        fig.patch.set_alpha(0)
        plt.savefig(save_fig_to, bbox_inches='tight', dpi=300)
    return ax
Пример #58
0
def bin_sf(cov, mc, p):
    if cov > mc:
        return stats.binom(cov, p).sf(mc)
    else:
        # cov == mc, sf = 0
        return 0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 24 00:05:59 2017

@author: ericcacciavillani

"""

import scipy.stats as ss

n = 15  # Number of total bets
p = .4  # Probability of event
max_sbets = 3  # Maximum number of successful bets

hh = ss.binom(n, p)

total_p = 0
for k in range(1,
               max_sbets + 1):  # DO NOT FORGET THAT THE LAST INDEX IS NOT USED
    total_p += hh.pmf(k)

print(total_p)
Пример #60
0
plt.figure(figsize=(12, 8))
for i, lambda_ in enumerate([1, 2, 4, 6]):
    plt.plot(k, poisson.pmf(k, lambda_), '-o', label=lambda_, color=colors[i])
    plt.fill_between(k, poisson.pmf(k, lambda_), color=colors[i], alpha=0.5)
    plt.legend()
plt.title("Poisson distribution")
plt.ylabel("PDF at $k$")
plt.xlabel("$k$")
plt.show()

# Binomial

plt.figure(figsize=(12, 6))
k = np.arange(0, 22)
for p, color in zip([0.1, 0.3, 0.6, 0.8], colors):
    rv = binom(20, p)
    plt.plot(k, rv.pmf(k), lw=2, color=color, label=p)
    plt.fill_between(k, rv.pmf(k), color=color, alpha=0.5)
    plt.legend()
plt.title("Binomial distribution")
plt.tight_layout()
plt.ylabel("PDF at $k$")
plt.xlabel("$k$")
plt.show()

# Alpha

x = np.linspace(0.1, 2, 100)
alpha = scipy.stats.alpha
alphas = [0.5, 1, 2, 4]
plt.figure(figsize=(12, 6))