def zernike(self, G, N): V = np.zeros([N + 1, N + 1, N + 1], dtype=complex) for a, b, c, alpha in nest(lambda: range(int(N / 2) + 1), lambda _a: range(N - 2 * _a + 1), lambda _a, _b: range(N - 2 * _a - _b + 1), lambda _a, _b, _c: range(_a + _c + 1), ): V[a, b, c] += np.power(IMAG_CONST, alpha) * \ nchoosek(a + c, alpha) * G[2 * a + c - alpha, alpha, b] W = np.zeros([N + 1, N + 1, N + 1], dtype=complex) for a, b, c, alpha in nest(lambda: range(int(N / 2) + 1), lambda _a: range(N - 2 * _a + 1), lambda _a, _b: range(N - 2 * _a - _b + 1), lambda _a, _b, _c: range(_a + 1), ): W[a, b, c] += np.power(-1, alpha) * np.power(2, a - alpha) * \ nchoosek(a, alpha) * V[a - alpha, b, c + 2 * alpha] X = np.zeros([N + 1, N + 1, N + 1], dtype=complex) for a, b, c, alpha in nest(lambda: range(int(N / 2) + 1), lambda _a: range(N - 2 * _a + 1), lambda _a, _b: range(N - 2 * _a - _b + 1), lambda _a, _b, _c: range(_a + 1), ): X[a, b, c] += nchoosek(a, alpha) * W[a - alpha, b + 2 * alpha, c] Y = np.zeros([N + 1, N + 1, N + 1], dtype=complex) for l, nu, m, j in nest(lambda: range(N + 1), lambda _l: range(int((N - _l) / 2) + 1), lambda _l, _nu: range(_l + 1), lambda _l, _nu, _m: range(int((_l - _m) / 2) + 1), ): Y[l, nu, m] += self.Yljm(l, j, m) * X[nu + j, l - m - 2 * j, m] Z = np.zeros([N + 1, N + 1, N + 1], dtype=complex) for n, l, m, nu, in nest(lambda: range(N + 1), lambda _n: range(_n + 1), # there's an if...mod missing in this but it # still works? lambda _n, _l: range(_l + 1), lambda _n, _l, _m: range(int((_n - _l) / 2) + 1), ): # integer required for k when used as power in Qklnu below: k = int((n - l) / 2) Z[n, l, m] += (3 / (4 * PI_CONST)) * \ self.Qklnu(k, l, nu) * np.conj(Y[l, nu, m]) for n, l, m in nest(lambda: range(N + 1), lambda _n: range(n + 1), lambda _n, _l: range(l + 1), ): if np.mod(np.sum([n, l, m]), 2) == 0: Z[n, l, m] = np.real( Z[n, l, m]) - np.imag(Z[n, l, m]) * IMAG_CONST else: Z[n, l, m] = -np.real(Z[n, l, m]) + \ np.imag(Z[n, l, m]) * IMAG_CONST return Z
def Qklnu(self, k, l, nu): aux_1 = np.power(-1, k + nu) / np.float(np.power(4, k)) aux_2 = np.sqrt((2 * l + 4 * k + 3) / 3.0) aux_3 = self.trinomial( nu, k - nu, l + nu + 1) * nchoosek(2 * (l + nu + 1 + k), l + nu + 1 + k) aux_4 = nchoosek(2.0 * (l + nu + 1), l + nu + 1) return (aux_1 * aux_2 * aux_3) / aux_4
def _get_minion_probability(self, barcode, e_s=0.051, e_i=0.049, e_d=0.078): ## since the read is the "ref" here.... n_d and n_i are dels/ins from/in read. ## Barcode is the real "reference" when considering 'deletions' and 'insertions' in MinION read ## ...meaning for minion probs the number of insertions is what we have listed as numdels ## .... and number of dels is what we have listed as num insertions ## i.e. we need e_i**n_d and e_d**n_i instead of e_i**n_i and e_d**n_d ## NOTE2: ## p_minion not necessarily comparable when barcodes are different lengths - can divide by barcode_len or q*r maybe -- or can multiply by multinomial maybe... self._aln_string_check(barcode) ## Gather variables needed p_m = 1 - e_i - e_d - e_s Esum = e_s + e_i + e_d E_s = e_s/Esum E_i = e_i/Esum E_d = e_d/Esum N = self.counts[barcode]['alnlen'] N2 = N - self.counts[barcode]['m'] N3 = N2 - self.counts[barcode]['mm'] N4 = N3 - self.counts[barcode]['i'] N5 = N4 - self.counts[barcode]['d'] NcK = nchoosek(N, self.counts[barcode]['m']) * nchoosek(N2, self.counts[barcode]['mm']) * nchoosek(N3, self.counts[barcode]['i']) * nchoosek(N4, self.counts[barcode]['d']) N_u = N + self.counts[barcode]['u'] #populate dict self.minionprobs[barcode] = {} self.minionprobs[barcode]['params'] = [e_s, e_i, e_d] self.minionprobs[barcode]['p_minion_aln'] = (p_m**self.counts[barcode]['m']) * (e_s**self.counts[barcode]['mm']) * (e_d**self.counts[barcode]['i']) * (e_i**self.counts[barcode]['d']) ## See note above as to why i and d are switched here self.minionprobs[barcode]['p_minion_un'] = (e_s**(E_s*self.counts[barcode]['u'])) * (e_i**(E_i*self.counts[barcode]['u'])) *(e_d**(E_d*self.counts[barcode]['u'])) ## since it is unclear if the unaligned were subs/dels/ins I am making use of all self.minionprobs[barcode]['p_minion'] = self.minionprobs[barcode]['p_minion_aln'] * self.minionprobs[barcode]['p_minion_un'] self.minionprobs[barcode]['norm_p_minion_aln'] = NcK * self.minionprobs[barcode]['p_minion_aln'] self.minionprobs[barcode]['norm_p_minion'] = self.minionprobs[barcode]['norm_p_minion_aln'] * self.minionprobs[barcode]['p_minion_un'] ## There seems to be no reason to multiply the unaligned by a nchoosek b/c they are fixed at the ends and correspond to some "single" unknown composite error
def func4_matrix_diag(beta, k, V): diag_ = beta**k diff_diag_ = k * beta**(k - 1) for j in range(1, k + 1): temp_diag = numpy.ones( (j + 1, )) * nchoosek(k, k - j) * (beta**(k - j)) diag_ = numpy.r_[temp_diag, diag_] if (k - j) == 0: temp_diag = numpy.zeros((j + 1, )) diff_diag_ = numpy.r_[temp_diag, diff_diag_] else: temp_diag = numpy.ones( (j + 1, )) * nchoosek(k, k - j) * (k - j) * (beta**(k - j - 1)) diff_diag_ = numpy.r_[temp_diag, diff_diag_] D = numpy.diag(diag_) dD = numpy.diag(diff_diag_) dim = len(diag_) U = numpy.zeros([dim - 1, dim - 1]) U[:, :] = D[0:dim - 1, 0:dim - 1] dU = numpy.zeros([dim - 1, dim - 1]) dU[:, :] = dD[0:dim - 1, 0:dim - 1] return D, U, dD, dU
def equidistant_barycentric_weights( n ): w = np.zeros( n, np.double ) for i in range( 0, n - n%2, 2 ): w[i] = 1. * nchoosek( n-1, i ) w[i+1] = -1. * nchoosek( n-1, i+1 ) if ( n%2 == 1 ): w[n-1] = 1. return w
def get_nonseparable_basis(n, L): x, y = np.meshgrid(np.linspace(0, L, L + 1), np.linspace(0, L, L + 1)) res = [] for i in range(n + 1): for j in range(n - i + 1): res.append( nchoosek(n, i) * nchoosek(n - i, j) * x**i * y**j * (L - x - y)**(n - i - j) / L**n) return res
def find_most_distant(self, input_sample, num_samples, num_params, k_choices, num_groups=None): """ Finds the 'k_choices' most distant choices from the 'num_samples' trajectories contained in 'input_sample' Arguments --------- input_sample : numpy.ndarray num_samples : int The number of samples to generate num_params : int The number of parameters k_choices : int The number of optimal trajectories num_groups : int, default=None The number of groups Returns ------- numpy.ndarray """ # Now evaluate the (N choose k_choices) possible combinations if nchoosek(num_samples, k_choices) >= sys.maxsize: raise ValueError("Number of combinations is too large") number_of_combinations = int(nchoosek(num_samples, k_choices)) # First compute the distance matrix for each possible pairing # of trajectories and store in a shared-memory array distance_matrix = self.compute_distance_matrix(input_sample, num_samples, num_params, num_groups) # Initialise the output array chunk = int(1e6) if chunk > number_of_combinations: chunk = number_of_combinations counter = 0 # Generate a list of all the possible combinations combo_gen = combinations(range(num_samples), k_choices) scores = np.zeros(number_of_combinations, dtype=np.float32) # Generate the pairwise indices once pairwise = np.array( [y for y in combinations(range(k_choices), 2)]) mappable = self.mappable for combos in self.grouper(chunk, combo_gen): scores[(counter * chunk):((counter + 1) * chunk)] \ = mappable(combos, pairwise, distance_matrix) counter += 1 return scores
def _get_binomial_probability(self, barcode, e_s=0.051, e_i=0.049, e_d=0.078): p_m = 1 - e_i - e_d - e_s n_m = self.counts[barcode]['m'] n_mm = self.counts[barcode]['mm']+self.counts[barcode]['i']+self.counts[barcode]['d'] n_u = self.counts[barcode]['u'] #binom_prob_k_matches_in_alignment binom_prob1 = nchoosek(self.counts[barcode]['alnlen'], n_m) * (p_m**n_m) * ((1-p_m)**(n_mm)) #binom_prob_k_matches_in_alignment_incl_unaligned_portions_of_barcode binom_prob2 = nchoosek(self.counts[barcode]['alnlen']+n_u, n_m) * (p_m**n_m) * ((1-p_m)**(n_mm+n_u)) #binom_prob_k_matches_in_barcode (w/ unaligned parts) binom_prob3 = nchoosek(self.counts[barcode]['queryLen'], n_m) * (p_m**n_m) * ((1-p_m)**(self.counts[barcode]['queryLen']-n_m)) self.binomprobs[barcode] = [binom_prob1, binom_prob2, binom_prob3, p_m]
def find_most_distant(self, input_sample, num_samples, num_params, k_choices, num_groups=None): """ Finds the 'k_choices' most distant choices from the 'num_samples' trajectories contained in 'input_sample' Arguments --------- input_sample : numpy.ndarray num_samples : int The number of samples to generate num_params : int The number of parameters k_choices : int The number of optimal trajectories num_groups : int, default=None The number of groups Returns ------- numpy.ndarray """ # Now evaluate the (N choose k_choices) possible combinations if nchoosek(num_samples, k_choices) >= sys.maxsize: raise ValueError("Number of combinations is too large") number_of_combinations = int(nchoosek(num_samples, k_choices)) # First compute the distance matrix for each possible pairing # of trajectories and store in a shared-memory array distance_matrix = self.compute_distance_matrix(input_sample, num_samples, num_params, num_groups) # Initialise the output array chunk = int(1e6) if chunk > number_of_combinations: chunk = number_of_combinations counter = 0 # Generate a list of all the possible combinations combo_gen = combinations(list(range(num_samples)), k_choices) scores = np.zeros(number_of_combinations, dtype=np.float32) # Generate the pairwise indices once pairwise = np.array( [y for y in combinations(list(range(k_choices)), 2)]) for combos in self.grouper(chunk, combo_gen): scores[(counter * chunk):((counter + 1) * chunk)] \ = self.mappable(combos, pairwise, distance_matrix) counter += 1 return scores
def Yljm(self, l, j, m): aux_1 = np.power(-1, j) * (np.sqrt(2 * l + 1) / np.power(2, l)) aux_2 = self.trinomial( m, j, l - m - 2 * j) * nchoosek(2 * (l - j), l - j) aux_3 = np.sqrt(self.trinomial(m, m, l - m)) y = (aux_1 * aux_2) / aux_3 return y
def amb6(d, exponent=2.2): # weighted Bernoulli Trials if d <= 0: return 0 if d > 250: return amb6(250, exponent) universe = d**2 prob_beneficial = .5 #+ math.log(d,2)/d prob_detrimental = 1 - prob_beneficial p = prob_beneficial q = prob_detrimental # d => node's degree # 1<= k <=d => how many edges must be ONES or ZEROS for the node to be considered ambiguous. The smaller k the stricter the criteria of ambiguity ambiguity = [] unity = 0 for k in range(0, d + 1, 1): dCk = nchoosek(d, k, exact=True) count = dCk * p**k * q**(d - k) ################################################################### ambiguity.append(count**exponent) # winner ################################################################### #print('d:'+l(d)+'k:'+l(k)+'count:'+l(count)+'\timpact:'+l(impact)+'(count*impact)**4: '+l((count*impact)))#+'\tambiguity:'+l(ambiguity)) unity += count #print('\td:'+l(d)+' ambiguity: '+r(np.average(ambiguity))+'\t 1/d**2: '+str(1/(d**2))) verify(unity) return np.average(ambiguity)
def amb7(d, n2e, e2n): # weighted Bernoulli Trials if d <= 0: return 0 if d > 250: return amb7(250, n2e, e2n) prob_beneficial = .5 #+ math.log(d,2)/d prob_detrimental = 1 - prob_beneficial p = prob_beneficial q = prob_detrimental ambiguity = [] unity = 0 for k in range(0, d + 1, 1): dCk = nchoosek(d, k, exact=True) count = dCk * p**k * q**(d - k) ################################################################### ambiguity.append(n2e * (count**(e2n * log10(d)))) # winner ################################################################### #print('d:'+l(d)+'k:'+l(k)+'count:'+l(count)+'\timpact:'+l(impact)+'(count*impact)**4: '+l((count*impact)))#+'\tambiguity:'+l(ambiguity)) unity += count #print('\td:'+l(d)+' ambiguity: '+r(np.average(ambiguity))+'\t 1/d**2: '+str(1/(d**2))) verify(unity) return np.average(ambiguity)
def amb8(d, exponent): # weighted Bernoulli Trials universe = d**2 prob_beneficial = .5 #+ math.log(d,2)/d prob_detrimental = 1 - prob_beneficial p = prob_beneficial q = prob_detrimental ambiguity = [] unity = 0 for k in range(0, d + 1, 1): dCk = nchoosek(d, k, exact=True) count = dCk * p**k * q**(d - k) ################################################################### impact = k / d if k > d / 2: impact = 1 - impact #ambiguity += impact*count*universe ambiguity.append(1 / ((count**impact) * universe)) #ambiguity.append((impact*count)**(log10(d))) ################################################################### #pprint('d:'+l(d)+'k:'+l(k)+'count:'+l(count)+'\timpact:'+l(impact)+'(count**impact)*universe: '+l(1/((count**impact)*universe)))#+'\tambiguity:'+l(ambiguity)) unity += count print('\td:' + l(d) + ' ambiguity: ' + r(np.average(ambiguity)) + '\t 1/d**2: ' + str(1 / (d**2))) verify(unity) return np.average(ambiguity) #sdiv(1,ambiguity)
def get_total_degree(num_dims, num_pts): degree = 1 while True: num_terms = int(round(nchoosek( num_dims+degree, degree ))) if ( num_terms >= num_pts ): break degree += 1 return degree
def MMLSCurvatureTensor2D3D(self, U, PX0, X0, W, deg=2): ''' The function assumes we are dealing with 2d surface in R^3. ''' #dim = 2 Normal = np.cross(U[:,0], U[:,1]) y_data = np.dot(Normal.T, X0) coeffs, Base = self.weightedLeastSquares(PX0, W, y_data, self.poly_deg) Curv_tensor = np.zeros((np.int(nchoosek(self.manifold_dim, 2)+1), np.int(nchoosek(self.manifold_dim, 2)+1))) for c, b in zip(coeffs.T[0], Base): if sum(b) == 2: indices = np.arange(len(b))[np.array(b, dtype=bool)] if len(indices)>1: Curv_tensor[indices[0], indices[1]] = c/2 Curv_tensor[indices[1], indices[0]] = c/2 else: Curv_tensor[indices[0], indices[0]] = c return Curv_tensor
def binomial(self, n,k): if (n,k) in self.binomial_dict.keys(): output = self.binomial_dict[(n,k)] else: output = nchoosek(n,k) self.binomial_dict[(n,k)] = output return output
def calculateSigmaFromPoint(self, point): '''calculating an approximate distance for the weight function This is a very naive implementation!!!''' point = point.squeeze() N = self.data.shape[1] N_THRESH = min([(self.sparse_factor * nchoosek(self.manifold_dim+self.poly_deg,self.manifold_dim)) + 1, N]) N_PERC = 100*np.float(N_THRESH)/N DISTS = np.linalg.norm(self.data - nlib.repmat(point,self.data.shape[1],1).T, axis = 0) sig_approximation = np.percentile(DISTS, N_PERC) self.sigma = sig_approximation
def compute_combinations(num_vars, level): if ( level > 0 ): num_indices = nchoosek(num_vars + level, num_vars) -\ nchoosek(num_vars + level-1, num_vars) indices = np.empty((num_vars, num_indices),dtype=int) extend = False h = 0; t = 0; i = 0; #important this is initialized to zero index = np.zeros((num_vars),dtype=int) while ( True ): index, extend, h, t = compute_next_combination( num_vars, level, extend, h, t, index); indices[:,i] = index.copy() i+=1 if ( not extend ): break else: indices = np.zeros((num_vars,1),dtype=int) return indices
def calculateSigma(self, n_iter=100): '''calculating an approximate distance for the weight function This is a very naive implementation!!!''' N = self.data.shape[1] N_THRESH = (self.sparse_factor * nchoosek(self.manifold_dim + self.poly_deg,self.manifold_dim)) + 1 N_PERC = max(min(100*np.float(N_THRESH)/N, 100), 0) sig_approximation = np.zeros(n_iter) for r_index, i in zip(np.random.randint(3,N-3, n_iter), range(n_iter)): q = np.asarray(self.data[:,r_index]) DISTS = np.linalg.norm(self.data - nlib.repmat(q,self.data.shape[1],1).T, axis = 0) sig_approximation[i] = np.percentile(DISTS, N_PERC) self.sigma = np.max(sig_approximation)
def matrix_A(k, th): V = len(th) if k == 0: A = numpy.ones((V, 1)) else: c = numpy.cos(th) s = numpy.sin(th) A = numpy.zeros((V, k + 1)) for j in range(0, k + 1): vec = (c**(k - j)) * (s**(j)) * nchoosek(k, j) vec.shape = [ V, ] A[:, j] = vec return A
def amb5(d): # Bernoulli Trials prob_beneficial = .5 #+ math.log(d,2)/d prob_detrimental = 1 - prob_beneficial # d => node's degree # 1<= k <=d => how many edges must be ONES or ZEROS for the node to be considered ambiguous. The smaller k the stricter the criteria of ambiguity k1 = math.ceil(math.log2(d)) #d-math.ceil(d*.5) #k2 = d-math.floor(d*.5) dCk1 = nchoosek( d, k1, exact=True) # equivelantly, dCk = fact(d) / ( fact(k1)*fact(d-k1) ) amb1 = dCk1 * prob_beneficial**k1 * prob_detrimental**(d - k1) #dCk2 = nchoosek(d,k2,exact=True) #amb2 = dCk2 * prob_beneficial**k2 * prob_detrimental**(d-k2) #amb = (amb1+amb2) / 2 print('d:' + l(d) + 'k:' + l(k1) + 'amb1:' + r(amb1)) return amb1**3
def get_coefficients_for_plotting(pce, qoi_idx): coeff = pce.get_coefficients()[:, qoi_idx] indices = pce.indices.copy() assert coeff.shape[0] == indices.shape[1] num_vars = pce.num_vars() degree = -1 indices_dict = dict() max_degree = indices.sum(axis=0).max() for ii in range(indices.shape[1]): key = hash_array(indices[:, ii]) indices_dict[key] = ii i = 0 degree_breaks = [] coeff_sorted = [] degree_indices_set = np.empty((num_vars, 0)) for degree in range(max_degree+1): nterms = nchoosek(num_vars+degree, degree) if nterms < 1e6: degree_indices = compute_hyperbolic_level_indices( num_vars, degree, 1.) else: 'Could not plot coefficients of terms with degree >= %d' % degree break degree_indices_set = np.hstack((degree_indices_set, indices)) for ii in range(degree_indices.shape[1]-1, -1, -1): index = degree_indices[:, ii] key = hash_array(index) if key in indices_dict: coeff_sorted.append(coeff[indices_dict[key]]) else: coeff_sorted.append(0.0) i += 1 degree_breaks.append(i) return np.array(coeff_sorted), degree_indices_set, degree_breaks
def num_total_degree_indices(num_vars,degree): num_indices = nchoosek(num_vars + degree, num_vars) return num_indices
def get_formatted_pairwise_alignment(alignment, blocksize=100, e_s=0.051, e_i=0.049, e_d=0.078, report_prob=False, report_evalue=False, with_unaligned_portion=False): ## TODO: printout unaligned portion as part of alignment viz ## For report_prob, you need to give the prob ## e_i, e_d, and e_s are insertion/deletion/substitution errors found in early MinION sequencing by Jain et al: Improved data analysis for the MinION nanopore sequencer ## Can compute prob of alignment by p_m=1-e_i-e_d-e_s; n_m = 0 n_mm = 0 n_d = 0 n_i = 0 ref = '' query = '' sticks = '' ## refseq = alignment.orig_ref[alignment.r_pos:alignment.r_end] r_i = alignment.r_pos q_i = alignment.q_pos for e in alignment.cigar: if e[1] == 'M': newref = alignment.orig_ref[r_i:r_i+e[0]] newquer = alignment.orig_query[q_i:q_i+e[0]] ref += newref query += newquer r_i += e[0] q_i += e[0] for i in range(len(newref)): sticks += '|' if newref[i].upper() == newquer[i].upper() else ' ' n_m += 1 if newref[i].upper() == newquer[i].upper() else 0 n_mm += 0 if newref[i].upper() == newquer[i].upper() else 1 if e[1] == 'D': ref += alignment.orig_ref[r_i:r_i+e[0]] query += '-'*e[0] sticks += ' '*e[0] r_i += e[0] n_d += e[0] #1 elif e[1] == 'I': ref += '-'*e[0] query += alignment.orig_query[q_i:q_i+e[0]] sticks += ' '*e[0] q_i += e[0] n_i += e[0] #1 #### print alignment.matches, alignment.mismatches, alignment.mismatches-n_mm-n_d-n_i, len(query), sum([n_m, n_mm, n_d, n_i]), sum([n_mm, n_d, n_i]) ## print "Ref (top):", alignment.r_name ## print "Query (bottom):", alignment.q_name ## print "Match:"+str(n_m), "Mismatch:"+str(n_mm), "Deletion:"+str(n_d), "Insertion:"+str(n_i), "PercentIdentity:"+str(100.0*n_m/sum([n_m, n_mm, n_d, n_i])) ## for i in range(0, len(ref), blocksize): ## print ref[i:i+blocksize] ## print sticks[i:i+blocksize] ## print query[i:i+blocksize] ## print qbases = n_m + n_mm + n_i rbases = n_m + n_mm + n_d n_u = len(alignment.orig_query) - qbases ## these were bases not in the alignment. Since barcode is query... i.e. pieces of barcode not found in read assert qbases == alignment.q_end-alignment.q_pos and rbases == alignment.r_end-alignment.r_pos assert len(ref) == len(query) assert len(ref) == sum([n_m, n_mm, n_d, n_i]) outstring = alignment.q_name + '\n' outstring += "Ref (top): " + alignment.r_name + ' ' + str(alignment.r_pos) + '-' + str(alignment.r_end) + ' r_bases_aligned:' + str(rbases) + ' pct_r_bases_aligned:' + str(100.0*rbases/len(alignment.orig_ref)) + ' refLen:' + str(len(alignment.orig_ref)) + ' bp\n' outstring += "Query (bottom): " + alignment.q_name + ' ' + str(alignment.q_pos) + '-' + str(alignment.q_end) + ' q_bases_aligned:' + str(qbases) + ' pct_q_bases_aligned:'+str(100.0*qbases/len(alignment.orig_query)) + ' queryLen:' + str(len(alignment.orig_query)) + ' bp\n' stats = ["AS:" + str(alignment.score), "Match:"+str(n_m), "Mismatch:"+str(n_mm), "Deletion:"+str(n_d), "Insertion:"+str(n_i), "AlignmentLength:"+str(len(ref)), "PercentIdentity:"+str(100.0*n_m/sum([n_m, n_mm, n_d, n_i])), "Barcode_Unaligned:"+str(n_u), "PercentIdentity_with_unaligned:"+str(100.0*n_m/sum([n_m, n_mm, n_d, n_i, n_u]))] outstring += (' ').join(stats) + '\n' p = np.e**(-1*alignment.score) n = len(alignment.orig_ref)*len(alignment.orig_query) evalue = n*p stats = ['n:' + str(n), 'p:' + str(p), 'e_value:' + str(evalue)] outstring += (' ').join(stats) + '\n' p_m = 1 - e_i - e_d - e_s p_minion_aln = (p_m**n_m) * (e_s**n_mm) * (e_d**n_d) * (e_i**n_i) Esum = e_s + e_i + e_d E_s = e_s/Esum E_i = e_i/Esum E_d = e_d/Esum p_minion_un = (e_s**(E_s*n_u)) * (e_i**(E_i*n_u)) *(e_d**(E_d*n_u)) ## since it is unclear if the unaligned were subs/dels/ins I am making use of all p_minion = p_minion_aln * p_minion_un N=sum([n_m, n_mm, n_i, n_d]) N2 = N - n_m N3 = N2 - n_mm N4 = N3 - n_i N5 = N4 - n_d NcK = nchoosek(N, n_m) * nchoosek(N2, n_mm) * nchoosek(N3, n_i) * nchoosek(N4, n_d) norm_p_minion_aln = NcK * p_minion_aln N_u = N + n_u norm_p_minion = norm_p_minion_aln * p_minion_un ## There seems to be no reason to multiply the unaligned by a nchoosek b/c they are fixed at the ends and correspond to some "single" unknown composite error stats = ['p_minion_aln:' + str(p_minion_aln), 'p_minion_un:' + str(p_minion_un), 'p_minion:' + str(p_minion), 'norm_p_minion_aln:'+str(norm_p_minion_aln), 'norm_p_minion:'+str(norm_p_minion)] ## p_minion not necessarily comparable when barcodes are different lengths - can divide by bc_len or q*r maybe outstring += (' ').join(stats) + '\n' ## actually since the read is the "ref" here.... n_d and n_i are dels/ins from/in read. Barcode is the real "reference" meaning for minion probs we need e_i**n_d and e_d**n_i p_minion_aln = (p_m**n_m) * (e_s**n_mm) * (e_d**n_i) * (e_i**n_d) p_minion = p_minion_aln * p_minion_un norm_p_minion_aln = NcK * p_minion_aln norm_p_minion = norm_p_minion_aln * p_minion_un stats = ['p_minion_aln:' + str(p_minion_aln), 'p_minion_un:' + str(p_minion_un), 'p_minion:' + str(p_minion), 'norm_p_minion_aln:'+str(norm_p_minion_aln), 'norm_p_minion:'+str(norm_p_minion)] ## p_minion not necessarily comparable when barcodes are different lengths - can divide by bc_len or q*r maybe outstring += (' ').join(stats) + '\n' binom_prob = nchoosek(sum([n_m, n_mm, n_d, n_i]), n_m) * (p_m**n_m) * ((1-p_m)**(n_mm+n_i+n_d)) outstring += 'binom_prob_k_matches_in_alignment:' + str(binom_prob) + '\n' binom_prob = nchoosek(sum([n_m, n_mm, n_d, n_i, n_u]), n_m) * (p_m**n_m) * ((1-p_m)**(n_mm+n_i+n_d+n_u)) outstring += 'binom_prob_k_matches_in_alignment_incl_unaligned_portions_of_barcode:' + str(binom_prob) + '\n' #in barcode only binom_prob = nchoosek(len(alignment.orig_query), n_m) * (p_m**n_m) * ((1-p_m)**(len(alignment.orig_query)-n_m)) outstring += 'binom_prob_k_matches_in_barcode:' + str(binom_prob) + '\n' if report_prob is not False: outstring += "Marginalized Probability Given Barcode Set: " + str(report_prob) + "\n" for i in range(0, len(ref), blocksize): outstring += ref[i:i+blocksize] + '\n' outstring += sticks[i:i+blocksize] + '\n' outstring += query[i:i+blocksize] + '\n\n' return outstring
from scipy.special import comb as nchoosek #----------------------------------------------------------------------------------- def l(n): return str(n).ljust(20, ' ') #----------------------------------------------------------------------------------- def r(n): return str(n).rjust(20, ' ') d = 20 p = .5 q = .5 for k in range(0, d + 1, 1): dCk = nchoosek(d, k, exact=True) count = dCk * p**k * q**(d - k) print(l(k) + r(count * 2**d))
def get_bernstein_basis(n, a, b): x = np.linspace(a, b, b - a + 1) return [ nchoosek(n, k) * ((x - a) / (b - a))**k * (1 - ((x - a) / (b - a)))**(n - k) for k in range(n + 1) ]