def logp_r(self, i: int) -> float: if self.verbose_prob: print(f"[{color_asgn('R')}] ", end='') intvls = self.pread.intvls I = intvls[i] if max(I.cb, I.ce) >= self.cp.depths['R']: if self.verbose_prob: print(f"> Global R-cov") return 0. # est_cnt = self._est_cov(i, I.b, s) l, r = self._find_nn(i, 'D', only_rel=True) # FIXME: reconsider coverage estimation if l is None and r is None: dcov_l = dcov_r = self.cp.depths['D'] elif l is None: dcov_l = dcov_r = intvls[r].cb elif r is None: dcov_l = dcov_r = intvls[l].ce else: dcov_l, dcov_r = intvls[l].ce, intvls[r].cb rcov_l, rcov_r = int(dcov_l * self.cp.DR_RATIO), int(dcov_r * self.cp.DR_RATIO) if I.cb >= rcov_l or I.ce >= rcov_r: if self.verbose_prob: print(f"> Est R-cov (B: {I.cb} >= {rcov_l} or E: {I.ce} >= {rcov_r})") # FIXME: "slipping interval" in repeats return R_LOGP logp_l = binom.logpmf(I.cb, rcov_l, 1 - 0.01) # TODO: use smaller n-sigma and use calc_logp logp_r = binom.logpmf(I.ce, rcov_r, 1 - 0.01) logp = logp_l + logp_r if self.verbose_prob: print(f"ER={logp_l:5.0f} + {logp_r:5.0f} -> logp={logp:5.0f}") return logp
def compute_likelihood(self, data, **kwargs): # The likelihood of the human data assert len(data) == 0 alpha = self.value['alpha'].value[0] beta = self.value['beta'].value[0] llt = self.value['likelihood_temperature'].value pt = self.value['prior_temperature'].value # compute each hypothesis' prior, fixed over all data priors = np.ones(self.N_hyps) * self.prior_offset # #h x 1 vector for nt in self.nts: # sum over all nonterminals priors = priors + np.dot(np.log(self.value['rulep'][nt].value), self.Counts[nt].T) priors = priors - np.log(sum(np.exp(priors))) priors = priors / pt # include prior temp pos = 0 # what response are we on? likelihood = 0.0 # for g in [randint(0, self.N_groups - 1) for _ in xrange(10)] for g in xrange(self.N_groups): posteriors = self.L[g]/llt + priors # posterior score posteriors = np.exp(posteriors - logsumexp(posteriors)) # posterior probability # Now compute the probability of the human data for _ in xrange(self.GroupLength[g]): ps = (1 - alpha) * beta + alpha * np.dot(posteriors, self.ModelResponse[pos]) likelihood += binom.logpmf(self.Nyes[pos], self.Ntrials[pos], ps) pos = pos + 1 return likelihood
def _compute_log_likelihood(self, X): t = time() matrix = [] lookup = {} k = 0 for x in X: row = [] for i in range(self.n_components): sum = 0 for j in range(2): index = (x[j], self.n[j], self.p[j][i]) if lookup.has_key(index): sum += lookup[index] k += 1 else: y = binom.logpmf(x[j], self.n[j], self.p[j][i]) lookup[index] = y sum += y row.append(sum) matrix.append(row) # print('time to compute log-likelihood matrix (compute_log_likelihood): ',\ # time()-t, file=sys.stderr) #print("HALLO") #print(np.asarray(matrix)) return np.asarray(matrix)
def _compute_log_likelihood(self, X): #t = time() matrix = [] lookup = {} k = 0 for x in X: row = [] for state in range(self.n_components): #state res = 0 for dim in range(self.n_features): #dim for comp in range(self.distr_magnitude): index = (x[dim], self.n[dim], self.p[dim][state][comp]) if lookup.has_key( index ): res += lookup[index] * self.c[dim][state][comp] k += 1 else: y = binom.logpmf(x[dim], self.n[dim], self.p[dim][state][comp]) lookup[index] = y res += y * self.c[dim][state][comp] row.append(res) matrix.append(row) # print('time to compute log-likelihood matrix (compute_log_likelihood): ',\ # time()-t, file=sys.stderr) #print("HALLO") #print(np.asarray(matrix)) return np.asarray(matrix)
def dbinom(x, size=1, prob=0.5, log=False): """ ============================================================================ dbinom() ============================================================================ Density Function for the binomial distribution. Returns the probability of getting "x" successes out of "size" number of trials, given a probability of "prob" for each success. USAGE: dbinom(x, size, prob=0.5, log=False) pbinom(q, size, prob=0.5, lowertail=True, log=False) qbinom(p, size, prob=0.5, lowertail=True, log=False) rbinom(n=1, size=1, prob=0.5) :param x: int. or array of ints. The number of successes :param size: int. Number of trials :param prob: float. Probability of a success :param log: bool. take the log? :return: ============================================================================ """ if log: # note, scipy flips meaning of n and size return binom.logpmf(x, n=size, p=prob, loc=0) else: # note, scipy flips meaning of n and size return binom.pmf(x, n=size, p=prob)
def _log_binom_expect(self, n, p, scaled_renyi_fn, ldistr, rdistr): """ Computes logarithm of expectation over binomial distribution with parameters (n, p). Parameters ---------- n : number, required Number of Bernoulli trials. p : number, required Probability of success. scaled_renyi_fn : function, required Function pointer to compute scaled Renyi divergence (inside Bernoulli expectation). ldistr : tuple or array, required Parameters of the left distribution (i.e., imposed by D). rdistr : tuple or array, required Parameters of the right distribution (i.e., imposed by D'). Returns ------- out : tuple Logarithm of expectation of scaled_renyi_fn over binomial distribution. """ k = torch.arange(n + 1, dtype=torch.float) log_binom_coefs = torch.tensor(binom.logpmf(k, n=n, p=p)) return torch.logsumexp(log_binom_coefs + scaled_renyi_fn(k, ldistr, rdistr), dim=1)
def counts_log_likelihood(proportions, methylated, unmethylated, reference): b = np.matmul(proportions, reference) ll = np.sum(binom.logpmf(methylated, methylated + unmethylated, b, loc=0)) return -ll / 1000
def logp_r(self, i: int, st_pred: CovsT) -> float: """Given an imaginary R-cov (and its position) larger than D-cov, compute the probability of transition from it to `i`-th interval. If R-cov is larger, larger counts are required to have higher prob. If R-cov is smaller, smaller counts are classified as R. """ I = self.intvls[i] beg_pos, beg_cnt, _, _ = self._expand_intvl(I) st = st_pred['R'] logp_sf = -inf # logp_sf = calc_logp_trans(self._pred(st.pos), beg_pos, # st.cnt, beg_cnt, # st.cnt, self.cp.read_len) logp_er = (binom.logpmf(beg_cnt, st.cnt, 1 - 0.01) if beg_cnt < st.cnt else -inf) logp = max(logp_sf, logp_er) if self.verbose_prob: print(f"SF={logp_sf:5.0f}{'*' if logp_sf >= logp_er else ' '} " f"ER={logp_er:5.0f}{'*' if logp_er >= logp_sf else ' '}") # FIXME: revise the following codes if logp > R_LOGP: return logp if max(I.ccb, I.cce) >= self.cp.depths['R']: if self.verbose_prob: print(' ' * 6 + f"Counts >= Global R-cov") return R_LOGP if max(I.ccb, I.cce) >= st.cnt: if self.verbose_prob: print(' ' * 6 + f" Counts >= Est R-cov") return R_LOGP return logp
def _compute_log_likelihood(self, X): t = time() matrix = [] lookup = {} k = 0 for x in X: row = [] for i in range(self.n_components): sum = 0 for j in range(2): index = (x[j], self.n[j], self.p[j][i]) if lookup.has_key( index ): sum += lookup[index] k += 1 else: y = binom.logpmf(x[j], self.n[j], self.p[j][i]) lookup[index] = y sum += y row.append(sum) matrix.append(row) # print('time to compute log-likelihood matrix (compute_log_likelihood): ',\ # time()-t, file=sys.stderr) #print("HALLO") #print(np.asarray(matrix)) return np.asarray(matrix)
def pdf_integral(p1, data): # calculate pdens for range of p1 xj, nj, c, p2, var = data dens = pdf(p1, data=data[2:]) return np.exp(np.log(dens) + binom.logpmf(xj, nj, p=p1))
def compute_likelihood(self, data, **kwargs): # The likelihood of the human data assert len(data) == 0 # compute each hypothesis' prior, fixed over all data priors = np.ones(self.N_hyps) * self.prior_offset # #h x 1 vector for nt in self.nts: # sum over all nonterminals priors = priors + np.dot(np.log(self.value[nt].value), self.Counts[nt].T) priors = priors - np.log(sum(np.exp(priors))) pos = 0 # what response are we on? likelihood = 0.0 for g in xrange(self.N_groups): posteriors = self.L[g] + priors # posterior score posteriors = np.exp(posteriors - logsumexp(posteriors)) # posterior probability # Now compute the probability of the human data for _ in xrange(self.GroupLength[g]): ps = np.dot(posteriors, self.ModelResponse[pos]) likelihood += binom.logpmf(self.Nyes[pos], self.Ntrials[pos], ps) pos = pos + 1 return likelihood
def calc_logp(i: int, state: str, intervals: List[CountIntvl], assignments: List[str]) -> float: """Compute probablity that the state of `intvl` is `state` by calculating smoothness of `intvl` given adjacent intervals of the same state. """ intvl = intervals[i] if state == 'E': return (logp_poisson(intvl.start.count, mean_depths[state]) + logp_poisson(intvl.end.count, mean_depths[state])) elif state in ('H', 'D'): """ p_depth, n_depth = calc_neighbor_depth(i, n_boundary, 'D', intervals, assignments) assert p_depth >= 0 or n_depth < len(intervals), \ "No diploid states" if p_depth < 0: p_depth = n_depth elif n_depth >= len(intervals): n_depth = p_depth """ p, n = find_nearest(i, state, intervals, assignments) if p < 0 and n >= len(intervals): return -np.inf prev_count = (intervals[p].end.count if p >= 0 else intervals[n].start.count) next_count = (intervals[n].start.count if n < len(intervals) else intervals[p].end.count) return (binom.logpmf(min(intvl.start.count, prev_count), max(intvl.start.count, prev_count), 0.92) + binom.logpmf(min(intvl.end.count, next_count), max(intvl.end.count, next_count), 0.92)) else: # 'R' p_depth, n_depth = calc_neighbor_depth(i, n_boundary, 'D', intervals, assignments) assert p_depth >= 0 or n_depth < len(intervals), \ "No diploid states" if p_depth < 0: p_depth = n_depth elif n_depth >= len(intervals): n_depth = p_depth if (p_depth + mean_depths['H'] / 2 <= intvl.start.count or n_depth + mean_depths['H'] / 2 <= intvl.end.count): return np.inf else: return -np.inf
def ML_Bin( data, model_pred, threshold=5, approx=True, factor=1, ): """ This function calculates the log-likelihood of the Bin approximation of the measurment of illnes in israel. It assumes the number of tests is n_{j,k,t}, the probability for getting a result is p_{j,k,t} - the model prediction, and the data point is q_{j,k,t}. in total the likelihood P(X=q)~Bin(n,p) per data point. For cells (specific t,j,k triplet) of not sufficient number of tests: with n_{j,k,t} < threshold the likelihood will be ignored. :param data: np.array of 4 dimensions : axis 0: n, q - representing different values: starting from total tests, and than positives rate. axis 1: t - time of sample staring from the first day in quastion calibrated to the model. axis 2: k - area index axis 3: j - age index data - should be smoothed. (filled with zeros where no test accured) :param model_pred: np.ndarray of 3 dimensions representing the probability: axis 1: t - time of sample staring from the first day in quastion calibrated to the model. axis 2: k - area index axis 3: j - age index :return: the -log-likelihood of the data given the prediction of the model. """ n = data[0, :, :] q = factor * data[1, :, :] p = model_pred if approx: ## ### # poison approx. ## ## ### ll = -poisson.logpmf( k=n * q, mu=n * p, ) else: ## ### # Binomial dist. ## ## ### ll = -binom.logpmf( k=n * q, n=n, p=p, ) # cut below threshold values ll = np.nan_to_num(ll, nan=0, posinf=0, neginf=0) ll = ll * (n > threshold) return ll.sum()
def null_loglike(self): types, tokens = self.endog, self.exog projected_n_types = np.median(self.ttrs) * tokens.reshape((-1, )) p = .5 binom_ns = np.floor((1 / p) * projected_n_types) logprobs = list( binom.logpmf(t, bn, p) for t, bn in zip(types, binom_ns)) logprobs_clipped = np.clip(logprobs, -10**6, 0) return sum(logprobs_clipped)
def _plumb_mle(self, parameters): days = self._fittingPeriod[1]-self._fittingPeriod[0] params = dict(zip(self._paramNames, parameters)) if self._stochastic: # Stochastic : on fait plusieurs experimentations # et chaque expérimentation a un peu de random dedans. # et on prend la moyenne experiments = [] # dims : [experiment #][day][value] for i in range(self._nbExperiments): res = self.predict(end = days, parameters = params) experiments.append(res) # print("... done running experiments") experiments = np.stack(experiments) else: res = self.predict(end = days, parameters = params) #if self._stochastic: lhs = dict() for state, obs, param in [(StateEnum.SYMPTOMATIQUE, ObsEnum.DHDT, params['Tau']), (StateEnum.DSPDT, ObsEnum.NUM_TESTED, params['Mu']), (StateEnum.DTESTEDDT, ObsEnum.NUM_POSITIVE, params['Eta']), #]: (StateEnum.CRITICAL, ObsEnum.DFDT, params['Theta'])]: # donc 1) depuis le nombre predit de personne SymPtomatique et le parametre tau, je regarde si l'observations dhdt est probable # 2) depuis le nombre predit de personne Critical et le parametre theta, je regarde si l'observations dfdt est probable # 3) sur la transition entre Asymptomatique et Symptomatique ( sigma*A -> dSPdt) avec le parmetre de test(mu), je regarde si l'observation num_tested est probable log_likelihood = 0 for day in np.arange(0, days): # Take all the values of experiments on a given day day_ndx # for a given measurement (state.value) observation = max(1, self._data[day + self._fittingPeriod[0]][obs.value]) prediction = None if self._stochastic: values = experiments[:, day, state.value] # binomial prediction = np.mean(values) else: prediction = res[day, state.value] try: log_bin = binom.logpmf(observation, np.round(np.mean(prediction)), param) if prediction == 0: #log_bin == float("-inf"): log_bin = 0 except FloatingPointError as exception: log_bin = -999 log_likelihood += log_bin #if log_likelihood == float("-inf"): #print("Error likelihood") lhs[obs] = log_likelihood return -sum(lhs.values())
def get_log_value(x, distr): if distr['distr_name'] == 'binomial': if lookup_pmf.has_key(x): return lookup_pmf[x] else: v = binom.logpmf(x, distr['n'], distr['p']) lookup_pmf[x] = v return v if distr['distr_name'] == 'nb': return distr['distr'].logpdf(x)
def get_sex(sample, Nx, Na, Lx, La): Rx = float(Nx) / (Nx + Na) # Beta CI with non-informative prior, aka Jefferey's interval. # See Brown, Cai, and DasGupta (2001). doi:10.1214/ss/1009213286 Rx_CI = beta.interval(0.99, Nx + 0.5, Na + 0.5) # expected ratios from the chromosome lengths Elx_X0 = float(Lx) / (Lx + 2 * La) Elx_XX = float(Lx) / (Lx + La) #ll_x0 = beta.logpdf(Elx_X0, Nx+0.5, Na+0.5) #ll_xx = beta.logpdf(Elx_XX, Nx+0.5, Na+0.5) ll_x0 = binom.logpmf(Nx, Nx + Na, Elx_X0) ll_xx = binom.logpmf(Nx, Nx + Na, Elx_XX) # likelihood ratio test alpha = 0.001 if chi2.sf(2 * (ll_x0 - ll_xx), 1) < alpha: sex = 'M' elif chi2.sf(2 * (ll_xx - ll_x0), 1) < alpha: sex = 'F' else: # indeterminate sex = 'U' if ll_x0 > ll_xx: Elx = 2 * Elx_X0 else: Elx = Elx_XX Mx = Rx / Elx Mx_CI = [Rx_CI[0] / Elx, Rx_CI[1] / Elx] if Mx < 0.4 or Mx > 1.2: #print("Warning: {} has unexpected Mx={:g}".format(sample, Mx), file=sys.stderr) pass if Mx > 0.6 and Mx < 0.8: # suspicious sample, may be contaminated sex = 'U' return Elx, Mx, Mx_CI, sex
def nll((a, b, g, l)): # expects a tuple "x" from the minimizer """ negative log likelihood function. """ res = p_func(stim, a, b, g, l) # p = np.nan_to_num(binom.pmf(n, m, res)) # log_p = np.nan_to_num(np.log(p)) # underflow of 'p' causes this to go to -infinity, which I hate. return -np.sum(binom.logpmf(n, m, res))
def get_candidate_del_loci(hap_cov, transition_prob=1e-2, het_read_prob=0.9): sel_cols = ['cov_q30_hap' + str(i) for i in range(3)] #hap_cov['total_cov'] = hap_cov[sel_cols].sum(axis=1) hap_cov['total_cov'] = hap_cov["cov_q30_hap0"] + hap_cov["cov_q30_hap1"] npos = len(hap_cov) ### Emission probabilities em_probs = np.ones((npos, 2)) * MIN_LOG_PROB # emission given no del em_probs[:, 0] = np.maximum(MIN_LOG_PROB, binom.logpmf(np.maximum(hap_cov.cov_q30_hap0, hap_cov.cov_q30_hap1), np.array(hap_cov.total_cov), 0.65)) em_probs[:, 1] = np.maximum(MIN_LOG_PROB, binom.logpmf(np.maximum(hap_cov.cov_q30_hap0, hap_cov.cov_q30_hap1), np.array(hap_cov.total_cov), het_read_prob)) ### Transition probabilities # [no-del -> no-del , del -> no-del, # no-del -> del , del -> del] trans_probs = np.array([[1 - transition_prob, transition_prob], [transition_prob , 1 - transition_prob]]) trans_probs = np.log(trans_probs) ### Prior state probabilities (prob of starting on a del or no del) del_prior_prob = DEL_PRIOR_PROB priors = np.array([1 - del_prior_prob, del_prior_prob]) priors = np.log(priors) max_probs = np.zeros((npos, 2)) max_state = np.zeros((npos, 2)) max_probs[0, :] = priors + em_probs[0, :] for i in range(1, npos): # max_probs[i-1, 0] is the probability of the most probable path # (i.e. hidden state sequence) that ends at position i-1 with a 0 new_probs = max_probs[i - 1, :] + trans_probs max_probs[i, :] = em_probs[i, :] + np.max(new_probs, axis=1) max_state[i, :] = np.argmax(new_probs, axis=1) best_path = np.zeros((npos, )) best_path[-1] = np.argmax(max_probs[-1, :]) for i in range(npos - 2, 0, -1): best_path[i] = max_state[i, best_path[i + 1]] return best_path
def metric_computational(counts: np.ndarray, shots: int) -> np.float_: """The negative loglikelihood of the 01 and 10 counts assuming a binomial probability distribution with equal probability Args: counts: a dict of counts with measurements as strings Returns: the metric value """ return -binom.logpmf([(counts.get('01') or 0), (counts.get('10') or 0)], n=shots, p=0.5).sum()
def log_binomial_likelihood(k, n, mu): # example: # k: array([2, 3, 2]) # n: array([2, 3, 2]) # mu: [0.3, 0.2] # return array([2, 3, 2]) # print k.shape, n.shape, mu.shape nn = n * np.ones((mu.shape[0], n.shape[0])) kk = k * np.ones((mu.shape[0], k.shape[0])) mumu = mu[np.newaxis, :].T * np.ones((mu.shape[0], n.shape[0])) ll = binom.logpmf(kk, nn, mumu) return ll.transpose()
def _compute_log_likelihood(self, X): """Return the log of the binomial probability density. Needs to return a matrix that is of shape (n_obs in sequence, n_components). In order to accommodate having variable probabilities in each bin along the genome (and having the (X, lengths) model of hmmlearn), I need to synthetically combine X and p so that they are divided up along the chromosomes together when hmmlearn calls iter_from_X_lengths() Thus, X is combined binomial counts (col 1), size (col2), and probabilities (col 3 + 4). xs and ns have shape (n_obs in sequence, n_features==1). ps is the emission probabilities and has shape (n_components [states] in HMM == 2, n_features==1).""" assert type(X).__module__ == "numpy" xs = X[:, 0] ns = X[:, 1] ps = X[:, 2:4] ref = binom.logpmf(xs, ns, ps[:, 0]) nonref = binom.logpmf(xs, ns, ps[:, 1]) return np.matrix([ref, nonref]).T
def predict(self, Y, X, parameter_sample): probs = expit(np.matmul(X, np.transpose(parameter_sample))) # get predictive log lik predictive_log_likelihoods = binom.logpmf(Y[:, None], max(Y), probs) # calculate squared and absolute error SE = (Y[:, None] - max(Y) * probs)**2 AE = np.abs(Y[:, None] - max(Y) * probs) return predictive_log_likelihoods, SE, AE
def likelihood(x): def elo(delta): return 1.0 / (1 + 10.0**(delta / 400.0)) p_win = elo(x[nz[1]] - x[nz[0]]) p = binom.logpmf(wins[nz], count[nz], p_win) assert (p_win < 1).all() # check that we do not predict any perfect winners assert (p < 0).all() # check that probability is between [0, 1) return -0.5 * np.sum(p)
def get_snv_log_likelihood(a_vec, d_vec, F, num_clusts, num_samples): # Calculate the likelihood of it coming from any of the clusters cluster_likelihoods = [] for i in range(num_clusts): clust_ll = 0 for j in range(num_samples): freq = min(1, F.item((i, j)) + 0.00001) likelihood = binom.logpmf(a_vec[j], d_vec[j], freq) clust_ll += likelihood if not (np.isnan(clust_ll) or clust_ll == float("-inf")): cluster_likelihoods.append(clust_ll) return logsumexp(cluster_likelihoods)
def logp_r_short(i, intvls, asgn, profile, DEPTHS, verbose, n_sigma=1): if verbose: print("### REPEAT ###") ib, ie = intvls[i] if max(profile[ib], profile[ie - 1]) >= DEPTHS['R']: return 0. #pc, nc = estimate_true_counts_intvl(i, 'D', 'b', intvls, asgn, profile) pc, nc = estimate_true_counts(i, 'D', 'b', intvls, asgn, profile) #p, n = nn_intvl(i, 'D', 'b', asgn) #pc, nc = profile[intvls[p][1] - 1] if p >= 0 else -1, profile[intvls[n][0]] if n <len(intvls) else -1 if pc == -1 and nc == -1: #pc, nc = estimate_true_counts_intvl(i, 'H', 'b', intvls, asgn, profile) pc, nc = estimate_true_counts(i, 'H', 'b', intvls, asgn, profile) #p, n = nn_intvl(i, 'H', 'b', asgn) #pc, nc = profile[intvls[p][1] - 1] if p >= 0 else -1, profile[intvls[n][0]] if n <len(intvls) else -1 if pc == -1 and nc == -1: pc, nc = DEPTHS['D'], DEPTHS['D'] elif pc == -1: pc = nc elif nc == -1: nc = pc elif pc == -1: pc = nc elif nc == -1: nc = pc dr_ratio = 1 + n_sigma * (1 / np.sqrt(DEPTHS['D'])) # X-sigma interval pc, nc = pc * dr_ratio, nc * dr_ratio if verbose: print( f"[LEFT] R_est={pc}, {profile[ib]} ~ [RIGHT] R_est={nc}, {profile[ie - 1]}" ) if profile[ib] >= pc or profile[ie - 1] >= nc: return 0. else: return binom.logpmf(profile[ib], pc, 1 - 0.01) + binom.logpmf( profile[ie - 1], nc, 1 - 0.01)
def fitDeathValuesToData(country, theta, psi, model_output, args): mortal_idx = 1 if args.use_infected: mortal_idx = 0 start_t, end_t = country.getFullModelTimespan() N = country.pop_size # add psi zeros to the front of the zvals array historic_zvals = np.concatenate((np.zeros(psi), model_output[:-psi, mortal_idx])) recorded_deaths = country.getRecordedMortalityValues() #country.country_data[country.country_data.daysSinceOrigin.between(start_t,end_t)].deaths.values # print (len(historic_zvals),len(recorded_deaths)) # print (historic_zvals) log_likelihood = sum( binom.logpmf(recorded_deaths, N * historic_zvals, theta)) return -log_likelihood
def counts_log_likelihood(alpha_est, X, X_depth, gamma): """ calculates a binomial log likelihood :param array alpha_est: estimate of the cell type proportions :param array X: methylation counts for cfDNA input :param array X_depth: total depths for cfDNA input :param array gamma: reference methylation proportions """ alpha_est = compute_projection( alpha_est).flatten() # compute the projection of the estimates b = np.matmul( alpha_est, gamma ) # the probability a cfDNA cpg comes from a reference tissue is the weighted average of the estimates of the tissues contributing to that person ll = np.sum(binom.logpmf(X, X_depth, b, loc=0)) # log likelihood return -ll # optimize negative ll
def get_maxll_cluster(a_vec, d_vec, F, num_clusts, num_samples): # Calculate the likelihood of it coming from any of the clusters cluster_likelihoods = [] maxll = float("-inf") max_clust = None for i in range(num_clusts): clust_ll = 0 for j in range(num_samples): freq = min(1, F.item((i, j)) + 0.00001) likelihood = binom.logpmf(a_vec[j], d_vec[j], freq / 2.) clust_ll += likelihood if clust_ll >= maxll: maxll = clust_ll max_clust = i return max_clust
def loglik2(Sigmai, ps, xs, ns, trans=transf): ps2 = trans(ps) p_start = ps2[0, :] p_rest = ps2[1:, :] p_diffs = p_rest - p_start p_diffs_scaled = p_diffs / np.sqrt(p_start * (1.0 - p_start)) n, N = p_diffs.shape binomial_part = np.sum(binom.logpmf(xs, ns, ps2)) normal_part = 0 for j in range(N): p0 = p_start[j] normal_part += multivariate_normal.logpdf(p_diffs[:, j], mean=np.array([0] * n), cov=np.linalg.inv(Sigmai) * p0 * (1 - p0)) return binomial_part + normal_part
def test_binom(): # Test we can at match a Binomial distribution from scipy p = 0.5 n = 5 dist = lk.Binomial() x = np.random.randint(low=0, high=n, size=(10,)) p1 = binom.logpmf(x, p=p, n=n) p2 = dist.loglike(x, p, n) np.allclose(p1, p2) p1 = binom.cdf(x, p=p, n=n) p2 = dist.cdf(x, p, n) np.allclose(p1, p2)
def _compute_log_likelihood(self, X): res = [] for x in X: #over all observations row = [] for i in range(self.n_components): #over number of HMM's state r_sum = 0 for j in range(self.n_features): #over dim it = range(self.dim[0]) if j == 0 else range(self.dim[0], self.dim[0] + self.dim[1]) #grab proper observation for k in it: index = (int(x[k]), self.p[j][i], self.n[j]) if not self.lookup_logpmf.has_key( index ): self.lookup_logpmf[index] = binom.logpmf(x[k], self.n[j], self.p[j][i]) r_sum += self.lookup_logpmf[index] row.append(r_sum) res.append(row) return np.asarray(res)
def mle(infile): ''' Estimate the overall contamination percentage using maximum likelihood estimation (MLE) ''' candidate_PIs = [i / 1000.0 for i in range(0, 501)] snp_hom = [] snp_het = [] for l in open(infile, 'r'): l = l.strip() if l.startswith('Chrom'): continue f = l.split() if f[12] == 'Fail': continue allele_1_count = int(f[3]) allele_2_count = int(f[5]) if f[9] == 'Hom': snp_hom.append( [allele_1_count + allele_2_count, allele_2_count, "Hom"]) #n,k elif f[9] == 'Het': snp_het.append( [allele_1_count + allele_2_count, allele_2_count, "Het"]) #n,k else: continue print >> sys.stderr, '@ ' + strftime( "%Y-%m-%d %H:%M:%S" ) + ": Estimating contamination from homozygous SNPs ..." prob = -float("inf") pi_of_max_prob_hom = 0.0 for pi in candidate_PIs: p2 = pi / 2.0 joint_prob = 0 for n, k, t in snp_hom: pmf_2 = binom.logpmf(k, n, p2) joint_prob += pmf_2 if joint_prob > prob: prob = joint_prob pi_of_max_prob_hom = pi return pi_of_max_prob_hom
def loglike(self, params): K, beta = params if beta > 1. or K < 1: return -np.inf types, tokens = self.endog, self.exog # V(n) = K*n**beta projected_n_types = K * tokens**beta p = .5 # binom mode = floor((n+1)*p), # so binom_n = floor(1/p*n) binom_ns = np.floor((1 / p) * projected_n_types) logprobs = list( binom.logpmf(t, bn, p)[0] for t, bn in zip(types, binom_ns)) logprobs_clipped = np.clip(logprobs, -10**6, 0) return sum(logprobs_clipped) # - beta*1000
def _loss_fun(pars, x, k, n, S, fixed): """ A binomial loss function. Returns negative log likelihood for k successes in n binomial trials at stimulus level x, fit with psychometric fun S. :param pars: the vector of parameters to be fit. Order = (m, w, lam, gam). :param x: the stimulus level; if S is weibull should be in log units. :param k: number of successes :param n: number of trials :param S: the unscaled Sigmoid to fit; a function taking (x, m, w) as input. :param fixed: dictionary of values for fixed params, e.g. {'lam': 0, 'gam':0.5} for a 2AFC with no lapse rate. :returns: the negative of the summed log likeihoods. """ yhat = psy_pred(pars, x, S, fixed) ll = binom.logpmf(k, n, yhat) return -ll.sum()
def loglike(self, y, f, n): r""" Binomial log likelihood. Parameters ---------- y: ndarray array of 0, 1 valued integers of targets f: ndarray latent function from the GLM prior (:math:`\mathbf{f} = \boldsymbol\Phi \mathbf{w}`) n: ndarray the total number of observations Returns ------- logp: ndarray the log likelihood of each y given each f under this likelihood. """ ll = binom.logpmf(y, n=n, p=expit(f)) return ll
def loglik_fermi(x0,ntrig,nall,r,E): return -1.*binom.logpmf(ntrig,nall,fermi(r,E,x0)).sum()
def _choice_traj_likelihood(tau, p_0, p_1, q, n, t): if tau < 0: return np.inf p_traj = _exp_choice_traj(tau, p_0, p_1, t) log_lik = binom.logpmf(q,n,p_traj).sum() return -log_lik
def data_log_likelihood(self, successes, trials, beta): '''Calculates the log-likelihood of a Polya tree bin given the beta values.''' return binom.logpmf(successes, trials, 1.0 / (1 + np.exp(-beta))).sum()
def _logp(self, value, p, k): return np.sum(binom.logpmf(value, k, p, loc=0))
def loglik_binom(x0,ntrig,nall,r,E): return -1.*binom.logpmf(ntrig,nall,inverrf(r,E,x0)).sum()