def update_data_emission_matrix_using_negative_binomial( d, dshared, phis, mus, data, index, timepoint): """ Update the data emission matrix based on the negative binomial distribution. This function allows using different phi and mu for every single state. phis: an array containing phi for every non-silent state mus: an array containing mu for every non-silent state data: an array of integer data_emission_matrix """ data_emission_matrix = d['data_emission_matrix'] dictionary = {} for i in range(dshared['n_obs']): for j in range(dshared['silent_states_begin']): if (phis[j], mus[j]) not in dictionary: dictionary[(phis[j], mus[j])] = {} p = phis[j] / (mus[j] + phis[j]) dictionary[(phis[j], mus[j])][data[i]] = nbinom.pmf( data[i], phis[j], p) elif data[i] not in dictionary[(phis[j], mus[j])]: p = phis[j] / (mus[j] + phis[j]) dictionary[(phis[j], mus[j])][data[i]] = nbinom.pmf( data[i], phis[j], p) data_emission_matrix[index][i][j] *= dictionary[(phis[j], mus[j])][data[i]] d['data_emission_matrix'] = data_emission_matrix
def p_n1_pl_n2(n, theta, m, t1, t2): summ = 0 p1 = theta / (t1 + theta) p2 = theta / (t2 + theta) for j in range(n + 1): summ += nbinom.pmf(j,m,p1)*\ nbinom.pmf(n-j,m,p2) return summ
def getllxtensor_singleroi(roi: str, data_path: str, fits_path: str, models_path: str, model_name: str, fit_format: int) -> np.array: """Recompute a single log-likelihood tensor (n_samples x n_datapoints). Args: roi (str): A single ROI, e.g. "US_MI" or "Greece". data_path (str): Full path to the data directory. fits_path (str): Full path to the fits directory. models_path (str): Full path to the models directory. model_name (str): The model name (without the '.stan' suffix). fit_format (int): The .csv (0) or .pkl (1) fit format. Returns: np.array: The log-likelihood tensor. """ csv_path = Path(data_path) / ("covidtimeseries_%s_.csv" % roi) df = pd.read_csv(csv_path) t0 = np.where(df["new_cases"].values > 1)[0][0] y = df[['new_cases', 'new_recover', 'new_deaths']].to_numpy()\ .astype(int)[t0:, :] # load samples samples = extract_samples(fits_path, models_path, model_name, roi, fit_format) S = np.shape(samples['lambda[0,0]'])[0] # print(S) # get number of observations, check against data above for i in range(1000, 0, -1): # Search for it from latest to earliest candidate = '%s[%d,0]' % ('lambda', i) if candidate in samples: N = i + 1 # N observations, add 1 since index starts at 0 break # And move on print(N) # run using old data print(len(y)) llx = np.zeros((S, N, 3)) # # conversion from Stan neg_binom2(n_stan | mu,phi) # to scipy.stats.nbinom(k,n_scipy,p) # # n_scipy = phi, p = phi/mu, k = n_stan # t0 = time.time() for i in range(S): phi = samples['phi'][i] for j in range(N): mu = max(samples['lambda[' + str(j) + ',0]'][i], 1) llx[i, j, 0] = np.log(nbinom.pmf(max(y[j, 0], 0), phi, phi / mu)) mu = max(samples['lambda[' + str(j) + ',1]'][i], 1) llx[i, j, 1] = np.log(nbinom.pmf(max(y[j, 1], 0), phi, phi / mu)) mu = max(samples['lambda[' + str(j) + ',2]'][i], 1) llx[i, j, 2] = np.log(nbinom.pmf(max(y[j, 2], 0), phi, phi / mu)) print(np.sum(llx[i, :, :])) print(samples['ll_'][i]) print('--') return llx
def nll_glm(params, y, Xg, Dg=None, nzmean=None, diversity=None, dist="nb"): """ Negative log-likelihood of the ZINB-GLM model """ eps = 1e-20 # A little epsilon to avoid errors pg = params[0] ag, yg = np.split(params[1:], 2) # Convert to compatible shapes, column vectors ag = ag.reshape(-1, 1) yg = yg.reshape(-1, 1) n_reads = y[y == 0].reshape(-1, 1) y_reads = y[y > 0].reshape(-1, 1) # Use the formulas to get distribution parameters n = np.exp(pg) # Dispersion parameters # Mean is estimated using ag if there is no nzmean arg mu = np.exp(Xg.dot(ag)) # Multiply mean offsets, ag estimates a ratio of the expected mean over non-zero mean if nzmean is not None: mu = np.multiply(mu, Dg.dot(nzmean.reshape(-1, 1))) # Extraneous is estimated by yg yg = Xg.dot(yg) # Add extraneous offset, yg estimates the change from the average diversity if diversity is not None: temp = Dg.dot(diversity.reshape(-1, 1)) temp[temp <= 0] = eps temp[temp >= 1] = 1 - eps yg += np.log( temp / (1 - temp)) # logit function, hopefully this is never 0 or 1 pi = 1 / (1 + np.exp(-yg)) # Sigmoid, inverse of logit function p = n / (mu + n) # ZINB (Equation 2) if dist == "nb": # Negative Binomial n_reads = pi[y == 0] + (1.0 - pi[y == 0]) * nbinom.pmf( n_reads, n, p[y == 0]) y_reads = (1.0 - pi[y > 0]) * nbinom.pmf(y_reads, n, p[y > 0]) elif dist == "norm": # Normal # "Normal Approximation to the Negative Binomial" by statisticsmatt # https://www.youtube.com/watch?v=JhmmbgLLVkQ var = n * (1 - p) / (p * p) sd = np.sqrt(var) n_reads = pi[y == 0] + (1.0 - pi[y == 0]) * norm.pdf( n_reads, mu[y == 0], sd[y == 0]) y_reads = (1.0 - pi[y > 0]) * norm.pdf(y_reads, mu[y > 0], sd[y > 0]) # Compute the negative log-likelihood """ https://stackoverflow.com/questions/5124376/convert-nan-value-to-zero/5124409 """ # The stackoverflow answer was ok, but sometimes values are still 0 # I added a little epsilon and everything seems to work better. n_reads = np.nan_to_num(np.log(n_reads + eps)) y_reads = np.nan_to_num(np.log(y_reads + eps)) nll = -(np.sum(n_reads) + np.sum(y_reads)) return nll
def compute_likelihood(failuers_before_five_successes, theta1_range, theta2_range): """ Computes the likelihood over the range (0, 1) for two theta parameters. The likelihood is modeled by a Negative Binomial pmf. """ no_successes = 5 likelihood_grid = np.zeros((len(theta1_range), len(theta2_range))) for x in range(len(theta1_range)): for y in range(len(theta2_range)): total_likelihood = 0 theta1 = theta1_range[x] theta2 = theta2_range[y] for data_point_failures in failuers_before_five_successes: p = theta1 * theta2 + (1 - theta1) * (1 - theta2) likelihood = np.log(nbinom.pmf( data_point_failures, no_successes, p)) total_likelihood += likelihood likelihood_grid[x, y] = total_likelihood return np.exp(likelihood_grid)
def _logpmf(self, x, mu, alpha, p, w): s, p = self.convert_params(mu, alpha, p) return _lazywhere(x != 0, (x, s, p, w), (lambda x, s, p, w: np.log(1. - w) + nbinom.logpmf(x, s, p)), np.log(w + (1. - w) * nbinom.pmf(x, s, p)))
def coverage_probability(self,nr_obs, a, mean_lib, stddev_lib,z, coverage_mean, read_len, s_inner,s_outer, b=None, coverage_model = False): ''' Distribution P(o|c,z) for prior probability over coverage. This probability distribution is implemented as an poisson distribution. Attributes: c -- coverage mean -- mean value of poisson distribution. Returns probability P(c) ''' if not b: # only one reference sequence. # We split the reference sequence into two equal # length sequences to fit the model. a = a/2 b = a/2 param = Param(mean_lib, stddev_lib, coverage_mean, read_len, s_inner,s_outer) lambda_ = mean_span_coverage(a, b, z, param) if coverage_model == 'Poisson': return poisson.pmf(nr_obs, lambda_, loc=0) elif coverage_model == 'NegBin': p = 0.01 n = (p*lambda_)/(1-p) return nbinom.pmf(nr_obs, n, p, loc=0) else: # This is equivalent to uniform coverage return 1 #uniform.pdf(nr_obs, loc=lambda_- 0.3*lambda_, scale=lambda_ + 0.3*lambda_ )
def getloglikelihood2(kmat, mu_estimate, alpha, sumup=False, log=True): ''' Get the log likelihood estimation of NB, using the current estimation of beta ''' if kmat.shape[0] != mu_estimate.shape[0]: raise ValueError( 'Count table dimension is not the same as mu vector dimension.') alpha = np.matrix(alpha).reshape(mu_estimate.shape[0], mu_estimate.shape[1]) kmat_r = np.round(kmat) mu_sq = np.multiply(mu_estimate, mu_estimate) var_vec = mu_estimate + np.multiply(alpha, mu_sq) nb_p = np.divide(mu_estimate, var_vec) nb_r = np.divide(mu_sq, var_vec - mu_estimate) if log: logp = nbinom.logpmf(kmat_r, nb_r, nb_p) else: logp = nbinom.pmf(kmat, nb_r, nb_p) if np.isnan(np.sum(logp)): #raise ValueError('nan values for log likelihood!') logp = np.where(np.isnan(logp), 0, logp) if sumup: return np.sum(logp) else: return logp
def NBPara2Arr(param, n): # print param Arr = np.zeros((n, 1)) for i in range(int(n)): Arr[i] = nbinom.pmf(i, 1, 1 - param) # change failure prob Arr *= 1 / np.sum(Arr) return Arr
def test_single_squeezed_state_hafnian(self): """Test the sampling routines by comparing the photon number frequencies and the exact probability distribution of a single mode squeezed vacuum state """ n_samples = 1000 mean_n = 1.0 r = np.arcsinh(np.sqrt(mean_n)) sigma = np.array([[np.exp(2 * r), 0.0], [0.0, np.exp(-2 * r)]]) n_cut = 10 samples = hafnian_sample_state(sigma, samples=n_samples, cutoff=n_cut) bins = np.arange(0, max(samples) + 1, 1) (freq, _) = np.histogram(samples, bins=bins) rel_freq = freq / n_samples nm = max(samples) // 2 x = nbinom.pmf(np.arange(0, nm, 1), 0.5, np.tanh(np.arcsinh(np.sqrt(mean_n)))**2) x2 = np.zeros(2 * len(x)) x2[::2] = x rel_freq = freq[0:-1] / n_samples x2 = x2[0:len(rel_freq)] assert np.allclose(rel_freq, x2, atol=rel_tol / np.sqrt(n_samples), rtol=rel_tol / np.sqrt(n_samples))
def NBPara2Arr(param,n): #print param Arr=np.zeros((n,1)) for i in range(int(n)): Arr[i]=nbinom.pmf( i,1, 1-param) #change failure prob Arr*=1/np.sum(Arr) return Arr
def _nbinom_pmf(self, value, log=False): if log: return nbinom.logpmf(value, self.no_of_successes, self.prob_of_success) else: return nbinom.pmf(value, self.no_of_successes, self.prob_of_success)
def plot(self, x, n, p): pmf = nbinom.pmf(x, n, p) plt.plot(x, pmf, 'o-') plt.title('Neg_Binomial: n=%i , p=%.2f' % (n, p), fontsize='value') plt.xlabel('Number of successes') plt.ylable('Probability of Successes', fontsize='value') plt.show()
def __init__(self, endog, exog=None, missing='none', **kwds): super(CUSTOM_ZNB, self).__init__(endog, exog, missing=missing, **kwds) if exog is None: self.exog = np.ones((self.nobs, 1)) self.nparams = self.exog.shape[1] obs_zp = len([e for e in self.endog if e == 0]) / float(len(self.endog)) pred_zp = nbinom.pmf(0, 2, 2.0 / (2.0 + self.endog.mean())) pred_zp = poisson.pmf(0, self.endog.mean()) additional_zp = obs_zp - pred_zp self.exog_names.append('alpha') if additional_zp > 0.25: init_z_val = np.log((1.0 / additional_zp) - 1) self.start_params = np.hstack( (np.zeros(self.nparams), 0.5, init_z_val)) self.exog_names.append('zi') self.nloglikeobs = self.nloglikeobs_wzp else: self.start_params = np.zeros(self.nparams) self.start_params = np.hstack((np.zeros(self.nparams), 0.5)) self.nloglikeobs = self.nloglikeobs_woz self.start_params[0] = np.log(self.endog.mean()) self.cloneattr = ['start_params']
def dcPF_loglik(yt,link_RQ,l,p): N,trash = link_RQ.shape nb_pmf = nbinom.pmf(np.arange(N),-l/np.log(1-p),1-p) logP = np.log(nb_pmf.dot(link_RQ)) logP0 = np.log(1-np.exp(-l)) loglik = logP-logP0 res = loglik[yt] return res.sum()
def negativebinomial_pmf(self, x, mu, kappa): n = kappa p = float(kappa) / (kappa + mu) negbinomial_pdf = nbinom.pmf(x, n, p) return negbinomial_pdf
def get_rad_negbin(self, S, n, p): """Obtain the predicted RAD from a negative binomial distribution""" abundance = list(np.empty([S])) rank = range(1, int(S) + 1) cdf_obs = [(rank[i]-0.5) / S for i in range(0, int(S))] j = 0 cdf_cum = 0 i = 1 while j < S: cdf_cum += nbinom.pmf(i, n, p) / (1 - nbinom.pmf(0, n, p)) while cdf_cum >= cdf_obs[j]: abundance[j] = i j += 1 if j == S: abundance.reverse() return abundance i += 1
def get_rad_negbin(self, S, n, p): """Obtain the predicted RAD from a negative binomial distribution""" abundance = list(np.empty([S])) rank = range(1, int(S) + 1) cdf_obs = [(rank[i] - 0.5) / S for i in range(0, int(S))] j = 0 cdf_cum = 0 i = 1 while j < S: cdf_cum += nbinom.pmf(i, n, p) / (1 - nbinom.pmf(0, n, p)) while cdf_cum >= cdf_obs[j]: abundance[j] = i j += 1 if j == S: abundance.reverse() return abundance i += 1
def test_pmf(self): n, p = truncatednegbin.convert_params(2, 0.5, 2) nb_logpmf = nbinom.pmf(6, n, p) / nbinom.sf(5, n, p) tnb_pmf = truncatednegbin.pmf(6, 2, 0.5, 2, 5) assert_allclose(nb_logpmf, tnb_pmf, rtol=1e-7) tnb_pmf = truncatednegbin.pmf(5, 2, 0.5, 2, 5) assert_equal(tnb_pmf, 0)
def pmf(self,data,pi=None,lambda_0=None,r=None,p=None,loc=None): pi = pi if pi is not None else self.pi lambda_0 = lambda_0 if lambda_0 is not None else self.lambda_0 r = r if r is not None else self.r p = p if p is not None else self.p loc = loc if loc is not None else 0 return pi*poisson.pmf(data,mu=lambda_0,loc=loc)+(1-pi)*nbinom.pmf(data,n=r,p=1-p,loc=loc)
def loglik(n_arr, t_arr, m, theta): if len(n_arr) != len(t_arr): raise ValueError("Length of arrays should be the same.") ll = 0 for i in range(len(n_arr)): p = t_arr[i] / (t_arr[i] + theta) ll += np.log(nbinom.pmf(n_arr[i], m, p)) return ll
def calc_2X_coverage_threshold(cov_dict): ''' calculate coverage threshold for each key in cov_dict, based on a likelihood ratio between empirical Nbinom(mu,disp) 1X coverage distribution, and a theoretical Poisson(2*mu) 2X coverage distribution. see end of 'alternative parameterization' section of Negative binomial page and scipy negative binomial documentation for details of calculation. choose coverage threshold s.t. log likelihood ratio > 10. ''' ## to convert my IDs to REL IDs. rel_name = {'RM3-130-1':'REL11734','RM3-130-2':'REL11735', 'RM3-130-3':'REL11736','RM3-130-4':'REL11737', 'RM3-130-5':'REL11738','RM3-130-6':'REL11739', 'RM3-130-7':'REL11740','RM3-130-8':'REL11741', 'RM3-130-9':'REL11742','RM3-130-10':'REL11743', 'RM3-130-11':'REL11744','RM3-130-12':'REL11745', 'RM3-130-13':'REL11746','RM3-130-14':'REL11747', 'RM3-130-15':'REL11748','RM3-130-16':'REL11749', 'RM3-130-17':'REL11750','RM3-130-18':'REL11751', 'RM3-130-19':'REL11752','RM3-130-20':'REL11753', 'RM3-130-21':'REL11754','RM3-130-22':'REL11755', 'RM3-130-23':'REL11756','RM3-130-24':'REL11757', 'REL4397':'REL4397', 'REL4398':'REL4398', 'REL288':'REL288','REL291':'REL291','REL296':'REL296','REL298':'REL298'} threshold_dict = {} for g in cov_dict: mean = float(cov_dict[g]['mean']) var = float(cov_dict[g]['variance']) q = (var-mean)/var n = mean**2/(var-mean) p = 1 - q ## assert that I did the math correctly. assert(isclose(nbinom.mean(n,p), mean)) assert(isclose(nbinom.var(n,p), var)) ## find the integer threshold that includes ~95% of REL606 distribution, ## excluding 5% on the left hand side. for x in range(int(mean),int(2*mean)): p0 = nbinom.pmf(x,n,p) p1 = poisson.pmf(x,2*mean) lratio = p1/p0 if lratio > 10: my_threshold = x my_threshold_p0 = p0 my_threshold_p1 = p1 my_lratio = lratio break threshold_dict[rel_name[g]] = {'threshold':str(my_threshold), 'threshold_p0':str(my_threshold_p0), 'threshold_p1':str(my_threshold_p1), 'lratio':str(lratio)} return threshold_dict
def _logpmf(self, x, mu, alpha, p, truncation): size, prob = self.convert_params(mu, alpha, p) pmf = 0 for i in range(int(np.max(truncation)) + 1): pmf += nbinom.pmf(i, size, prob) logpmf_ = nbinom.logpmf(x, size, prob) - np.log(1 - pmf) # logpmf_[x < truncation + 1] = - np.inf return logpmf_
def plot_logo(adam_params, file_name, core_length): #assignes each kmer to an index and visa versa kmer_inx = generate_kmer_inx(core_length) inx_kmer = {y:x for x,y in kmer_inx.items()} colnames = [inx_kmer[i] for i in range(len(inx_kmer))] + [inx_kmer[i] for i in range(len(inx_kmer))] + ['sf', 'r', 'p'] + ['LL'] data = pd.DataFrame(adam_params, columns=colnames) core1 = data.sort_values(by='LL').iloc[0,:len(kmer_inx)] core1_probs = energy2prob(core1, top_n=5) core2 = data.sort_values(by='LL').iloc[0,len(kmer_inx):2*len(kmer_inx)] core2_probs = energy2prob(core2, top_n=5) r = data.sort_values(by='LL')['r'].values[0] p = data.sort_values(by='LL')['p'].values[0] sns.set_style("ticks") sns.despine(trim=True) COLOR_SCHEME = {'G': 'orange', 'A': 'red', 'C': 'blue', 'T': 'darkgreen', 'U': 'darkgreen' } _ , (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(4.5, 1.5)) plot_core_logo(core1_probs, ax1, color_scheme=COLOR_SCHEME) plot_core_logo(core2_probs, ax3, color_scheme=COLOR_SCHEME) #plot distance mean = ((1-p)*r)/(p) xx = np.arange(0,int(mean)+8,1) _ = ax2.plot(xx, nbinom.pmf(xx, r, p), 'o--',alpha=0.7, color='black') _ = ax2.set_xlabel('distance') #not show y axis in the plot ax2.set_frame_on(False) _ = ax2.get_yaxis().set_visible(False) xmin, xmax = ax2.get_xaxis().get_view_interval() ymin, ymax = ax2.get_yaxis().get_view_interval() ax2.add_artist(matplotlib.lines.Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2)) _ = ax3.set_yticks(range(0,2)) _ = ax3.set_yticklabels(np.arange(0,2,1)) _ = ax3.get_xaxis().set_visible(False) _ = ax1.set_ylabel('probability') sns.despine(ax=ax2, trim=True) sns.despine(ax=ax3, trim=True) plt.savefig(file_name + '.pdf', bbox_inches='tight') plt.savefig(file_name + '.png', bbox_inches='tight', dpi=150)
def nbinom_pmf_range(lambda_: int, rho: int, bin_id: int): stacked = np.zeros(len(kf_range), dtype=np.float64) lambda_ /= 100 # 2-digit precision rho /= 100 # 2-digit precision n = lambda_ / (rho - 1) p = 1 / rho start, end = bins[bin_id] for i in range(start, end + 1): stacked += nbinom.pmf(kf_range, n * i, p) return stacked
def make_nb_plot(self): self._get_nb_estimate() p = self.nb_prob n = self.nb_size x = np.arange(0, 100) pmf = nbinom.pmf(x, n, p) self.line_neg_binomial, = plt.plot(x, pmf, ls=":", linewidth=2) self.nb_real_kl = entropy(self.probs, pmf) self.nb_grammar_kl = entropy(self.b_prob[:100], pmf[:100])
def log_post_x_star_y_star(self, x_star, y_star): log_sum = 0.0 N = self.N_y_1 if y_star==1 else self.N_y_0 Nx = self.X_sum_y_1 if y_star==1 else self.X_sum_y_0 for col in self.x_cols: log_sum += np.log(nbinom.pmf(x_star[col], Nx[col] + self.a, (N+self.b)/float(N+self.b+1))) # print(self.count) self.count += 1 return log_sum
def plot_nbinom(r, p): left = nbinom.ppf(0.01, r, p) right = nbinom.ppf(0.99, r, p) x = np.arange( left, right, int((right - left) / 10) ) plt.plot( x, nbinom.pmf(x, r, p), alpha=0.6, color='gray' ) plt.plot( x, nbinom.pmf(x, r, p), 'o', label='$r=%s, p = %s$' % (r, p) )
def test_inversion_diffs(self): cfg = AppSettings() reps = 1000 deltas = [] # observed number of differences for _ in range(0, reps): dna = Chromosome() old_seq = dna.sequence dna.inversion() deltas.append( sum(1 for a, b in zip(old_seq, dna.sequence) if a != b)) pmfs = [] expected_deltas = [] # expected differences # Assumes the length of an inversion is drawn from a negative binomial # distribution. Calculates the probability of each length until # 99.99% of the distribution is accounted for. The expected number of # differences for each length is multiplied by the probability of that length # and the sum of that gives the expected differences overall. k = 0 while sum(pmfs) <= 0.9999: pmf = nbinom.pmf(k, 1, (1 - cfg.genetics.mutation_length / (1 + cfg.genetics.mutation_length))) pmfs.append(pmf) diffs = math.floor( k / 2) * (1 - 1 / len(Chromosome.nucleotides())) * 2 expected_deltas.append(pmf * diffs) k += 1 expected_delta = sum(expected_deltas) # Since we are multiplying the binomial distribution (probably of differences at # a given lenght) by a negative binomial distribution (probability of a length) # we must compute the variance of two independent random variables # is Var(X * Y) = var(x) * var(y) + var(x) * mean(y) + mean(x) * var(y) # http://www.odelama.com/data-analysis/Commonly-Used-Math-Formulas/ mean_binom = cfg.genetics.mutation_length var_binom = binom.var(mean_binom, 1 / (len(Chromosome.nucleotides()))) mean_nbinom = cfg.genetics.mutation_length var_nbinom = nbinom.var(cfg.genetics.mutation_length, mean_nbinom / (1 + mean_nbinom)) var = var_binom * var_nbinom + \ var_binom * mean_nbinom + \ mean_binom * var_nbinom observed_delta = sum(deltas) / reps conf_99 = ((var / reps)**(1 / 2)) * 5 assert expected_delta - conf_99 < observed_delta < expected_delta + conf_99
def plot_distribution(self): """ Plot the distrubution of estimated Coronavirus cases in Dhaka """ p = self.calculate_pro_detected_overseas() n = self.international.cases fig, ax = plt.subplots(1, 1) x = np.arange(nbinom.ppf(0.025, n, p), nbinom.ppf(0.975, n, p)) ax.vlines(x, 0, nbinom.pmf(x, n, p), color='lightblue', lw=5, alpha=0.5) ax.set_title(" pmf of coronavirus cases in Dhaka " + self.date)
def is_bipartite(adam_params, core_length): #assignes each kmer to an index and visa versa kmer_inx = generate_kmer_inx(core_length) inx_kmer = {y:x for x,y in kmer_inx.items()} colnames = [inx_kmer[i] for i in range(len(inx_kmer))] + [inx_kmer[i] for i in range(len(inx_kmer))] + ['sf', 'r', 'p'] + ['LL'] data = pd.DataFrame(adam_params, columns=colnames) r = data.sort_values(by='LL')['r'].values[0] p = data.sort_values(by='LL')['p'].values[0] prob_zero = nbinom.pmf(0, r, p) return prob_zero<0.5
def fit_CRF(cons, resps, nr_c50, nr_expn, nr_gain, nr_base, v_varGain, fit_type): # fit_type (i.e. which loss function): # 1 - least squares # 2 - square root # 3 - poisson # 4 - modulated poisson np = numpy; n_sfs = len(resps); # Evaluate the model loss_by_sf = np.zeros((n_sfs, 1)); for sf in range(n_sfs): all_params = (nr_c50, nr_expn, nr_gain, nr_base); param_ind = [0 if len(i) == 1 else sf for i in all_params]; nr_args = [nr_base[param_ind[3]], nr_gain[param_ind[2]], nr_expn[param_ind[1]], nr_c50[param_ind[0]]]; # evaluate the model pred = naka_rushton(cons[sf], nr_args); # ensure we don't have pred (lambda) = 0 --> log will "blow up" if fit_type == 4: # Get predicted spike count distributions mu = pred; # The predicted mean spike count; respModel[iR] var = mu + (v_varGain * np.power(mu, 2)); # The corresponding variance of the spike count r = np.power(mu, 2) / (var - mu); # The parameters r and p of the negative binomial distribution p = r/(r + mu); # no elif/else if fit_type == 1 or fit_type == 2: # error calculation if fit_type == 1: loss = lambda resp, pred: np.sum(np.power(resp-pred, 2)); # least-squares, for now... if fit_type == 2: loss = lambda resp, pred: np.sum(np.square(np.sqrt(resp) - np.sqrt(pred))); curr_loss = loss(resps[sf], pred); loss_by_sf[sf] = np.sum(curr_loss); else: # if likelihood calculation if fit_type == 3: loss = lambda resp, pred: poisson.logpmf(resp, pred); curr_loss = loss(resps[sf], pred); # already log if fit_type == 4: loss = lambda resp, r, p: np.log(nbinom.pmf(resp, r, p)); # Likelihood for each pass under doubly stochastic model curr_loss = loss(resps[sf], r, p); # already log loss_by_sf[sf] = -np.sum(curr_loss); # negate if LLH return np.sum(loss_by_sf);
def gen_single_mode_dist(s, cutoff=50, N=1): """Generate the photon number distribution of :math:`N` identical single mode squeezed states. Args: s (float): squeezing parameter cutoff (int): Fock cutoff N (float): number of squeezed states Returns: (array): Photon number distribution """ r = 0.5 * N q = 1.0 - np.tanh(s)**2 N = cutoff // 2 ps_tot = np.zeros(cutoff) if cutoff % 2 == 0: ps = nbinom.pmf(np.arange(N), p=q, n=r) ps_tot[0::2] = ps else: ps = nbinom.pmf(np.arange(N + 1), p=q, n=r) ps_tot[0:-1][0::2] = ps[0:-1] ps_tot[-1] = ps[-1] return ps_tot
def coverage_probability(self, nr_obs, a, mean_lib, stddev_lib, z, coverage_mean, read_len, s_inner, s_outer, b=None, coverage_model=False): ''' Distribution P(o|c,z) for prior probability over coverage. This probability distribution is implemented as an poisson distribution. Attributes: c -- coverage mean -- mean value of poisson distribution. Returns probability P(c) ''' if not b: # only one reference sequence. # We split the reference sequence into two equal # length sequences to fit the model. a = a / 2 b = a / 2 param = Param(mean_lib, stddev_lib, coverage_mean, read_len, s_inner, s_outer) lambda_ = mean_span_coverage(a, b, z, param) if coverage_model == 'Poisson': return poisson.pmf(nr_obs, lambda_, loc=0) elif coverage_model == 'NegBin': p = 0.01 n = (p * lambda_) / (1 - p) return nbinom.pmf(nr_obs, n, p, loc=0) else: # This is equivalent to uniform coverage return 1 #uniform.pdf(nr_obs, loc=lambda_- 0.3*lambda_, scale=lambda_ + 0.3*lambda_ )
def computeHscore(states, y, parameters_last, xweights_last, thetaweights_last, t): #xweights is of size Nx x Ntheta (unnormalized); thetaweights is also unnormalized #the ouput compositeHScore is for model comparision, while simpleHScore is for H-based Bayes #the composite score is to evaluate the t-time obs. at the predictive distribution made at time t-1 y0 = int(y[0]) y1 = int(y[1]) Nx = states.shape[0] Ntheta = states.shape[2] #compute the weighting matrix W = zeros((Nx, Ntheta)) thetaweights_last = thetaweights_last / sum( thetaweights_last) #first transform to normalized ones xNormConst = sum(xweights_last, axis=0) for k in range(Ntheta): W[:, k] = thetaweights_last[k] * xweights_last[:, k] / xNormConst[k] #compute the conditional density given a last theta-particle and a last x-particle p = zeros((Nx, Ntheta)) n = zeros((Nx, Ntheta)) for k in range(Nx): p[k, :] = 1 / (1 + parameters_last[0, :] * states[k, 0, :]) p[k, :] = minimum(p[k, :], 1 - 1e-7) n[k, :] = maximum(1, floor(states[k, 0, :] * p[k, :] / (1 - p[k, :]))).astype(int32) # conDensi = zeros((Nx, Ntheta)) score_y0 = average(a=nbinom.pmf(y0, n, p), weights=W) score_y0_p = average(a=nbinom.pmf(y0 + 1, n, p), weights=W) #plus 1 score_y1 = average(a=nbinom.pmf(y1, n, p), weights=W) score_y1_p = average(a=nbinom.pmf(y1 + 1, n, p), weights=W) # print '\n', score_y0_p, score_y0 if y0 == 0: score0 = score_y0_p / score_y0 - 1 + 0.5 * pow( score_y0_p / score_y0 - 1, 2) else: score_y0_m = average(a=nbinom.pmf(y0 - 1, n, p), weights=W) #minus 1 score0 = score_y0_p / score_y0 - score_y0 / score_y0_m + 0.5 * pow( score_y0_p / score_y0 - 1, 2) if y1 == 0: score1 = score_y1_p / score_y1 - 1 + 0.5 * pow( score_y1_p / score_y1 - 1, 2) else: score_y1_m = average(a=nbinom.pmf(y1 - 1, n, p), weights=W) #minus 1 score1 = score_y1_p / score_y1 - score_y1 / score_y1_m + 0.5 * pow( score_y1_p / score_y1 - 1, 2) compositeHScore = score0 + score1 simpleHScore = zeros(1) return {"simpleHScore": simpleHScore, "compositeHScore": compositeHScore}
def gen_ztnegbinom(n, mu, size): """Zero truncated negative binomial distribution. input: n, int number of successes mu, float or int number of trials size, float probability of success output: ztnb, list of int draws from a zero truncated negative binomial distribution """ temp = nbinom.pmf(0, mu, size) p = [uniform.rvs(loc=temp[i], scale=1-temp[i]) for i in range(n)] ztnb = [int(nbinom.ppf(p[i], mu[i], size)) for i in range(n)] return np.array(ztnb)
def log_neg_binom_likelihood(k, r, mu, sd=0): if sd==0: offset = 0 diff = 1 minR = r maxR = r else: offset = NB_FRAC*sd minR = int(r - offset) maxR = int(r + offset) diff = maxR-minR+1 #num iterations mle=0 #Inclusive for r_val in xrange(minR, maxR+1, NB_INCR): #likelihood that r_val, mu are true parameters given that you have seen k newVal = llh_neg_binom(k, r_val, mu) #probability of seeing k given r,p - assuming the prior is a negative binomial with #r and mu as true values in this case weight = nbinom.pmf(k, r, mu) mle += newVal+weight return mle
def getloglikelihood2(kmat,mu_estimate,alpha,sumup=False,log=True): ''' Get the log likelihood estimation of NB, using the current estimation of beta ''' #logmu_est=sk.extended_design_mat * np.matrix(beta_est).getT() # Tracer()() #mu_estimate= np.exp(logmu_est) # these are all N*1 matrix #mu_vec=np.array([t[0] for t in mu_estimate.tolist()]) #k_vec=np.array([round(t[0]) for t in kmat.tolist()]) #if len(mu_vec) != len(k_vec): # raise ValueError('Count table dimension is not the same as mu vector dimension.') # var_vec=mu_vec+alpha*mu_vec*mu_vec # nb_p=[mu_vec[i]/var_vec[i] for i in range(len(mu_vec))] # nb_r=[mu_vec[i]*mu_vec[i]/(var_vec[i]-mu_vec[i]) for i in range(len(mu_vec))] # if log: # logp=np.array([nbinom.logpmf(k_vec[i],nb_r[i],nb_p[i]) for i in range(len(mu_vec))]) #else: # logp=np.array([nbinom.pmf(k_vec[i],nb_r[i],nb_p[i]) for i in range(len(mu_vec))]) if kmat.shape[0] != mu_estimate.shape[0]: raise ValueError('Count table dimension is not the same as mu vector dimension.') kmat_r=np.round(kmat) mu_sq=np.multiply(mu_estimate,mu_estimate) var_vec=mu_estimate+np.multiply(alpha, mu_sq) nb_p=np.divide(mu_estimate,var_vec) nb_r=np.divide(mu_sq,var_vec-mu_estimate) if log: logp=nbinom.logpmf(kmat_r,nb_r,nb_p) else: logp=nbinom.pmf(kmat,nb_r,nb_p) if np.isnan(np.sum(logp)): #raise ValueError('nan values for log likelihood!') logp=np.where(np.isnan(logp),0,logp) if sumup: return np.sum(logp) else: return logp
def getloglikelihood2(kmat,mu_estimate,alpha,sumup=False,log=True): ''' Get the log likelihood estimation of NB, using the current estimation of beta ''' if kmat.shape[0] != mu_estimate.shape[0]: raise ValueError('Count table dimension is not the same as mu vector dimension.') kmat_r=np.round(kmat) mu_sq=np.multiply(mu_estimate,mu_estimate) var_vec=mu_estimate+np.multiply(alpha, mu_sq) nb_p=np.divide(mu_estimate,var_vec) nb_r=np.divide(mu_sq,var_vec-mu_estimate) if log: logp=nbinom.logpmf(kmat_r,nb_r,nb_p) else: logp=nbinom.pmf(kmat,nb_r,nb_p) if np.isnan(np.sum(logp)): #raise ValueError('nan values for log likelihood!') logp=np.where(np.isnan(logp),0,logp) if sumup: return np.sum(logp) else: return logp
def getloglikelihood2(k_list,mu_list,alpha,sumup=False,log=True): ''' Get the log likelihood estimation of NB, using the current estimation of beta and alpha ''' # solution 1 mu_sq=np.multiply(mu_list,mu_list) var_vec=mu_list+np.multiply(alpha, mu_sq) nb_p=np.divide(mu_list,var_vec) nb_r=np.divide(mu_sq,var_vec-mu_list) if log: logp=nbinom.logpmf(k_list,nb_r,nb_p) else: logp=nbinom.pmf(k_list,nb_r,nb_p) if np.isnan(np.sum(logp)): logp=np.where(np.isnan(logp),0,logp) #print("hi",np.sum(logp)) if sumup: #print(np.sum(logp)) return np.sum(logp) else: #pass return logp
def get_nb(self): p = 0.200086480861 n = 4.88137405883 x = np.arange(0, self.distrib_len) self.nb_pmf = nbinom.pmf(x, n, p)
def _ppf(self, q, n, p): return nbinom.ppf(nbinom.sf(0, n, p) * q + nbinom.pmf(0, n, p), n, p)
def test_pmf_p2(self): n, p = sm.distributions.zinegbin.convert_params(30, 0.1, 2) nb_pmf = nbinom.pmf(100, n, p) tnb_pmf = sm.distributions.zinegbin.pmf(100, 30, 0.1, 2, 0.01) assert_allclose(nb_pmf, tnb_pmf, rtol=1e-5, atol=1e-5)
def _rvs(self, n, p): return nbinom.ppf(uniform(low=nbinom.pmf(0, n, p)), n, p)
def _pmf(self, x, n, p): if x == 0: return 0.0 else: return nbinom.pmf(x, n, p) / nbinom.sf(0, n, p)
def _cdf(self, x, n, p): k = floor(x) if k == 0: return 0.0 else: return (nbinom.cdf(x, n, p) - nbinom.pmf(0, n, p)) / nbinom.sf(0, n, p)
def test_pmf(self): n, p = sm.distributions.zinegbin.convert_params(1, 0.9, 1) nb_logpmf = nbinom.pmf(2, n, p) tnb_pmf = sm.distributions.zinegbin.pmf(2, 1, 0.9, 2, 0.5) assert_allclose(nb_logpmf, tnb_pmf * 2, rtol=1e-7)