def chain(i,n,z,N,lam,p,q): t=[] l1=[] l2=[] l3=[] for k in range(0,N): y = np.random.uniform(0,1) x = hypergeom.rvs(n,i,z) pi= lam*(hypergeom_pmf(n, i, z, x))*(x/z)*((n-i)/n) + (1-lam)*p*((n-i)/n) ip= lam*(hypergeom_pmf(n, n-i, z,z-x))*((z-x)/z)*(i/n) + (1-lam)*q*(i/n) if i != 0 and i != n: if y <= pi: i=i+1 elif pi <y<= ip+pi: i=i-1 else: i=i else: i=i l1.append(pi) l2.append(ip) l3.append(y) t.append(i) #print(l1) #print(l2) #print(l3) return t
def _exactly_sample(rdd, num: int, seed: int): split_size = rdd.mapPartitionsWithIndex( lambda s, it: [(s, sum(1 for _ in it))]).collectAsMap() total = sum(split_size.values()) if num > total: raise ValueError( f"not enough data to sample, own {total} but required {num}") # random the size of each split sampled_size = {} for split, size in split_size.items(): sampled_size[split] = hypergeom.rvs(M=total, n=size, N=num) total = total - size return rdd.mapPartitionsWithIndex(_ReservoirSample( split_sample_size=sampled_size, seed=seed).func, preservesPartitioning=True)
def simulate_sketch(self,kmerSequenceLength,nMutated,sketchSize): if not (0 < sketchSize <= kmerSequenceLength): raise ValueError prng = self.sketchPrngs[sketchSize] if (sketchSize in self.sketchPrngs) else None # Given sequence length L and N mutated kmers, we consider the kmers # in A union B to be numbered from 0 to L+N-1, and we consider the # *un*mutated kmers to be the first L-N of these # <---unmutated--> <---mutated, in A--> <---mutated, in B--> # +----------------+--------------------+--------------------+ # | 0 L-1-N | L-N L-1 | L L+N-1 | # +----------------+--------------------+--------------------+ # The L-N *un*mutated kmers are A intersection B. The hash function # would effectively choose a random set of s of all L+N kmers as bottom # sketch BS(A union B), where s is the sketch size. So conceptually, we # have an urn with L+N balls, s of which are 'red'. We draw L-N balls # and want to know how many are red. This is the size of the # intersection of BS(A), BS(B), and BS(A union B). L = kmerSequenceLength N = nMutated s = sketchSize if (N == L): # hypergeom.rvs doesn't handle this case, a return 0 # .. case that seems perfectly legitimate nIntersection = hypergeom.rvs(L+N,s,L-N,random_state=prng) return nIntersection
from scipy.stats import hypergeom import matplotlib.pyplot as plt # Suppose we have a collection of 20 animals, of which 7 are dogs. Then if # we want to know the probability of finding a given number of dogs if we # choose at random 12 of the 20 animals, we can initialize a frozen # distribution and plot the probability mass function: [M, n, N] = [20, 7, 12] rv = hypergeom(M, n, N) x = np.arange(0, n + 1) pmf_dogs = rv.pmf(x) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(x, pmf_dogs, 'bo') ax.vlines(x, 0, pmf_dogs, lw=2) ax.set_xlabel('# of dogs in our group of chosen animals') ax.set_ylabel('hypergeom PMF') plt.show() # Instead of using a frozen distribution we can also use `hypergeom` # methods directly. To for example obtain the cumulative distribution # function, use: prb = hypergeom.cdf(x, M, n, N) # And to generate random numbers: R = hypergeom.rvs(M, n, N, size=10)
f2 = c2[3] print( "Probabilidad de que haya exactamente 3 cargamentos que contengan \nal menos un dispositivo defectuoso de entre los 20 seleccionados en 10000000 simulaciones es:", f2) #n = 5, N = 40, k = 3, x = 1 print("-----------------------------------------------------------------") print("Ejercicio 2)\n") Mvar = 40 nvar = 5 Nvar = 3 print("Con, M = 40, n = 5 y N = 3:") variable = hypergeom.rvs(Mvar, nvar, Nvar, size=size) a3, b3 = np.unique(variable, return_counts=True) c3 = b3 / size f3 = c3[1] print( "Probabilidad de que se encuentre exactamente un componente defectuoso\ncon 10000000 de simulaciones:", f3) print("-----------------------------------------------------------------") print("Ejercicio 3)\n") print("Con lambda = 1:") z = poisson.rvs(1, size=size)
import matplotlib.pyplot as plt from scipy.stats import hypergeom, rv_discrete import numpy as np numargs = hypergeom.numargs #[ M, n, N ] = [100, 10, -1] #Display frozen pmf: rv = hypergeom( 10, 20, 3 ) print rv.dist.b x = np.arange( 0, np.min( rv.dist.b, 3 ) + 1 ) h = plt.plot( x, rv.pmf( x ) ) exit() #Check accuracy of cdf and ppf: prb = hypergeom.cdf( x, M, n, N ) h = plt.semilogy( np.abs( x - hypergeom.ppf( prb, M, n, N ) ) + 1e-20 ) #Random number generation: R = hypergeom.rvs( M, n, N, size=100 ) #Custom made discrete distribution: vals = [np.arange( 7 ), ( 0.1, 0.2, 0.3, 0.1, 0.1, 0.1, 0.1 )] custm = rv_discrete( name='custm', values=vals ) h = plt.plot( vals[0], custm.pmf( vals[0] ) )
ax.axvline(x=q1, linewidth=3, alpha=0.6, color='black', linestyle='dashed') ax.axvline(x=median, linewidth=3, alpha=0.6, color='black', linestyle='dashed') ax.axvline(x=q3, linewidth=3, alpha=0.6, color='black', linestyle='dashed') horiz_text_offset = 0.4 vert_text_offset = 0.1 plt.xlim(0, 21) plt.text(x[0] + (q1 - x[0]) / 2.0 - horiz_text_offset, vert_text_offset, 'Q1', color='black', size='x-large') plt.text(q1 + (median - q1) / 2.0 - horiz_text_offset, vert_text_offset, 'Q2', color='black', size='x-large') plt.text(median + (q3 - median) / 2.0 - horiz_text_offset, vert_text_offset, 'Q3', color='black', size='x-large') plt.text(q3 + (x[-1] - q3) / 2.0 - horiz_text_offset, vert_text_offset, 'Q4', color='black', size='x-large') # Random samples samp_size = 100 pts = hypergeom.rvs(M, n, N, size=samp_size) # Add histogram for sampled points ys = [.005] * samp_size plt.hist(pts, bins=10, facecolor='purple', alpha=0.45, weights=np.ones_like(pts) / float(len(pts)), density=False, edgecolor='black', linewidth=0.5) plt.plot(pts, ys, 'bx') plt.show() # Sample statistics std_sample = np.std(pts) var_sample = np.var(pts) mean_sample = np.mean(pts) q1_sample, median_sample, q3_sample = np.percentile(pts, [25, 50, 75])
for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(5) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(5) fig.suptitle('Distribucion de Poisson') plt.show() # DISTRIBUCIÓN HIPERGEOMETRICA from scipy.stats import hypergeom hypergeom.pmf(1, M=15 + 10, n=15, N=3) hypergeom.cdf(1, M=15 + 10, n=15, N=3) 1 - hypergeom.cdf(1, M=15 + 10, n=15, N=3) hypergeom.rvs(M=15 + 10, n=15, N=3, size=100) [M, n, N] = [20, 7, 12] x = np.arange(max(0, N - M + n), min(n, N)) fig = plt.figure(figsize=(5, 2.7)) ax = fig.add_subplot(1, 2, 1) ax.plot(x, hypergeom.pmf(x, M, n, N), 'bo', ms=5, label='hypergeom pmf') ax.vlines(x, 0, hypergeom.pmf(x, M, n, N), colors='b', lw=2, alpha=0.5) ax.set_ylim([0, max(hypergeom.pmf(x, M, n, N)) * 1.1]) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(5) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(5) ax = fig.add_subplot(1, 2, 2) ax.plot(x, hypergeom.cdf(x, M, n, N), 'bo', ms=5, label='hypergeom cdf') ax.vlines(x, 0, hypergeom.cdf(x, M, n, N), colors='b', lw=2, alpha=0.5)