def test_betabinom(self): from scipy.stats import betabinom import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) n, a, b = 5, 2.3, 0.63 mean, var, skew, kurt = betabinom.stats(n, a, b, moments='mvsk') x = np.arange(betabinom.ppf(0.01, n, a, b), betabinom.ppf(0.99, n, a, b)) ax.plot(x, betabinom.pmf(x, n, a, b), 'bo', ms=8, label='betabinom pmf') ax.vlines(x, 0, betabinom.pmf(x, n, a, b), colors='b', lw=5, alpha=0.5) rv = betabinom(n, a, b) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) # plt.show() self.assertEqual("AxesSubplot(0.125,0.11;0.775x0.77)", str(ax))
def __init__(self, query_dim, attn_dim, static_channels=8, static_kernel_size=21, dynamic_channels=8, dynamic_kernel_size=21, prior_length=11, alpha=0.1, beta=0.9, score_mask_value=-float("inf")): super(DynamicConvolutionAttention, self).__init__(query_dim, attn_dim, score_mask_value) self.prior_length = prior_length self.dynamic_channels = dynamic_channels self.dynamic_kernel_size = dynamic_kernel_size P = betabinom.pmf(np.arange(prior_length), prior_length - 1, alpha, beta) self.register_buffer("P", torch.FloatTensor(P).flip(0)) self.W = nn.Linear(query_dim, attn_dim) self.V = nn.Linear( attn_dim, dynamic_channels * dynamic_kernel_size, bias=False ) self.F = nn.Conv1d( 1, static_channels, static_kernel_size, padding=(static_kernel_size - 1) // 2, bias=False, ) self.U = nn.Linear(static_channels, attn_dim, bias=False) self.T = nn.Linear(dynamic_channels, attn_dim) self.v = nn.Linear(attn_dim, 1, bias=False)
def add_likelihood_info_bayes(nline, a, b): ''' Adds additional fields to a vcf info line describing the likelihood of producing the site and predicted frequency, with and without testing correction. Uses the bayesian beta posterior mean estimator. ''' chro, pos, rid, ref, alt, qual, rfil, info = nline.strip().split() altset = alt.split(',') #need alt count and depth value from info iv = info.split(';') dp = int(iv[0].strip("DP=")) arats = [float(v) for v in iv[1].strip('AF=').split(',')] if len(altset) != len(arats): print('Mismatch', nline) ninfo = '' for i, ar in enumerate(arats): alt_count = round( ar * dp ) #should be nearly intable, but floating point math and rounding off can make problems sometimes. #the structure of the new information will go as follows: #for each alt, there will be a series of comma delineated entries #that represent the most likely frequency and overall likelihood of the site #this is uncorrected so don't go thinking things are so super special or anything. #if I have one mutation that's an A, I'll add ";A=.003,.000001" for example. likelihood = betabinom.pmf(n=dp, a=a, b=b, k=alt_count) #calculate the most likely frequency. #using the simple bayesian beta posterior mean equation #phat = (x + a) / (n + a + b) lf = (alt_count + a) / (dp + a + b) #add information to the line. ninfo += ';' + altset[i] + '=f:' + str(lf) + ',l:' + str(likelihood) return '\t'.join([chro, pos, rid, ref, alt, qual, rfil, info + ninfo])
def add_likelihood_info_mle(nline, a, b): ''' Adds additional fields to a vcf info line describing the likelihood of producing the site, with and without testing correction. Uses an MLE approach. ''' chro, pos, rid, ref, alt, qual, rfil, info = nline.strip().split() altset = alt.split(',') #need alt count and depth value from info iv = info.split(';') dp = int(iv[0].strip("DP=")) arats = [float(v) for v in iv[1].strip('AF=').split(',')] if len(altset) != len(arats): print('Mismatch', nline) ninfo = '' for i, ar in enumerate(arats): alt_count = round(ar * dp) #should always be intable. #the structure of the new information will go as follows: #for each alt, there will be a series of comma delineated entries #that represent the most likely frequency and overall likelihood of the site #this is uncorrected so don't go thinking things are so super special or anything. #if I have one mutation that's an A, I'll add ";A=.003,.000001" for example. likelihood = betabinom.pmf(n=dp, a=a, b=b, k=alt_count) #calculate the most likely frequency. #using dynamic programming to save on runtime. if (alt_count, dp) in ldf: lf = ldf[(alt_count, dp)] else: lf = fmin(func=get_fp, x0=.01, args=(a, b, dp, alt_count), disp=False) ldf[(alt_count, dp)] = lf[0] #add information to the line. ninfo += ';' + altset[i] + '=f:' + str(lf) + ',l:' + str(likelihood) return '\t'.join([chro, pos, rid, ref, alt, qual, rfil, info + ninfo])
def computeBetaWeights(self, k=.6): strikeList = self.optionChain.getStrikeList() n = len(strikeList) x = np.arange(0, n) weights = betabinom.pmf(x, n - 1, k, k) weights = pd.Series(weights, index=self.optionChain.getStrikeList()) return weights
def _compute_posterior_class_probability(self, k, n) -> Tuple[float, float]: """ For given parameters of beta distributions for both classes, computes the posterior class probabilities: .. math:: p(c' = x | n', k')= \\binom{n'}{k'} \\frac{B(k'+\\alpha_x, n' - k' + \\beta_x)}{B(\\alpha_x, \\beta_x)} \\frac{N_x + 1}{N + 2}, x=0,1 Arguments: k: number of disease-associated sequences n: total number of sequences Returns: a tuple of probabilities for negative class and positive class for given example, normalized to sum to 1 """ predicted_probability_0 = beta_binomial.pmf( k, n, self.alpha_0, self.beta_0) * (self.N_0 + 1) / (self.N_0 + self.N_1 + 2) predicted_probability_1 = beta_binomial.pmf( k, n, self.alpha_1, self.beta_1) * (self.N_1 + 1) / (self.N_0 + self.N_1 + 2) normalization_const = predicted_probability_0 + predicted_probability_1 if np.isnan(normalization_const): raise ValueError( f"{ProbabilisticBinaryClassifier.__name__}: encountered nan in predicted posterior class probabilities." f"\nprobability of class 0: {predicted_probability_0}\nprobability of class 1: {predicted_probability_1}\n" f"alpha 0: {self.alpha_0}, beta 0: {self.beta_0}\nalpha 1: {self.alpha_1}, beta 1: {self.beta_1}\n" f"positive example count: {self.N_1}, negative example count: {self.N_0}" ) elif normalization_const == 0: warnings.warn( f"{ProbabilisticBinaryClassifier.__name__}: posterior class probabilities for both classes are 0 (k={k}, n={n}). Returning " f"normalized values to indicate that the example could not be classified, by setting both probabilities to 0.5.", RuntimeWarning) return 0.5, 0.5 return predicted_probability_0 / normalization_const, predicted_probability_1 / normalization_const
def iter_threshold(obs_mismatches, obs_positions, unobs_positions, alpha, beta, d_half, threshold_fcn=soft_species_probability): for mm in range(unobs_positions + 1): p_mm = betabinom.pmf(mm, unobs_positions, alpha, beta) d = pctdiff(obs_mismatches, obs_positions, mm, unobs_positions) p_species = threshold_fcn(d, d_half) if p_species < 1e-10: break yield (mm, p_mm, d, p_species)
def __init__( self, query_dim, embedding_dim, # pylint: disable=unused-argument attention_dim, static_filter_dim, static_kernel_size, dynamic_filter_dim, dynamic_kernel_size, prior_filter_len=11, alpha=0.1, beta=0.9, ): super().__init__() self._mask_value = 1e-8 self.dynamic_filter_dim = dynamic_filter_dim self.dynamic_kernel_size = dynamic_kernel_size self.prior_filter_len = prior_filter_len self.attention_weights = None # setup key and query layers self.query_layer = nn.Linear(query_dim, attention_dim) self.key_layer = nn.Linear(attention_dim, dynamic_filter_dim * dynamic_kernel_size, bias=False) self.static_filter_conv = nn.Conv1d( 1, static_filter_dim, static_kernel_size, padding=(static_kernel_size - 1) // 2, bias=False, ) self.static_filter_layer = nn.Linear(static_filter_dim, attention_dim, bias=False) self.dynamic_filter_layer = nn.Linear(dynamic_filter_dim, attention_dim) self.v = nn.Linear(attention_dim, 1, bias=False) prior = betabinom.pmf(range(prior_filter_len), prior_filter_len - 1, alpha, beta) self.register_buffer("prior", torch.FloatTensor(prior).flip(0))
def __init__( self, attn_rnn_size, hidden_size, static_channels, static_kernel_size, dynamic_channels, dynamic_kernel_size, prior_length, alpha, beta, ): super(DynamicConvolutionAttention, self).__init__() self.prior_length = prior_length self.dynamic_channels = dynamic_channels self.dynamic_kernel_size = dynamic_kernel_size P = betabinom.pmf(np.arange(prior_length), prior_length - 1, alpha, beta) self.register_buffer("P", torch.FloatTensor(P).flip(0)) self.W = nn.Linear(attn_rnn_size, hidden_size) self.V = nn.Linear(hidden_size, dynamic_channels * dynamic_kernel_size, bias=False) self.F = nn.Conv1d( 1, static_channels, static_kernel_size, padding=(static_kernel_size - 1) // 2, bias=False, ) self.U = nn.Linear(static_channels, hidden_size, bias=False) self.T = nn.Linear(dynamic_channels, hidden_size) self.v = nn.Linear(hidden_size, 1, bias=False)
def ppr(k, N0, a, b, t, kt): assert (kt <= t) with np.errstate(divide='ignore', over='ignore'): val = betabinom.pmf(k, N0, a, b) / betabinom.pmf( k - kt, N0 - t, a + kt, b + t - kt) return val
def pdf(self, x: float) -> float: k = int(x) return float(betabinom.pmf(k, self.n, self.alpha, self.beta))
ax2 = fig.add_subplot(212) # ベータ分布のグラフ x_beta = np.linspace(beta.ppf(0, a, b), beta.ppf(1, a, b), size) x_betabinom = np.arange(betabinom.ppf(0, n, a, b), betabinom.ppf(1, n, a, b)) + 1 ax1.plot(x_beta, beta.pdf(x_beta, a, b), color="blue", label=f"Beta : a = {a}, b = {b}") ax1.legend(loc='upper center', fontsize=30) ax1.tick_params(labelsize=24) # ベータ2項分布のグラフ ax2.plot(x_betabinom, betabinom.pmf(x_betabinom, n, a, b), color="red", alpha=0.5, label=f"Beta Binomial : a = {a}, b = {b}") ax2.vlines(x_betabinom, 0, betabinom.pmf(x_betabinom, n, a, b), lw=16, alpha=0.55, color="red") ax2.legend(loc="upper center", fontsize=30) ax2.tick_params(labelsize=24) fig.tight_layout() # 可視化 st.subheader("Visualization of the probability functions")