def binom_conf_interval(n, x, cl=0.975, alternative="two-sided", p=None, **kwargs): """ Compute a confidence interval for a binomial p, the probability of success in each trial. Parameters ---------- n : int The number of Bernoulli trials. x : int The number of successes. cl : float in (0, 1) The desired confidence level. alternative : {"two-sided", "lower", "upper"} Indicates the alternative hypothesis. p : float in (0, 1) Starting point in search for confidence bounds for probability of success in each trial. kwargs : dict Key word arguments Returns ------- tuple lower and upper confidence level with coverage (approximately) 1-alpha. Notes ----- xtol : float Tolerance rtol : float Tolerance maxiter : int Maximum number of iterations. """ from scipy.optimize import brentq from scipy.stats import binom, hypergeom assert alternative in ("two-sided", "lower", "upper") if p is None: p = x / n ci_low = 0.0 ci_upp = 1.0 if alternative == 'two-sided': cl = 1 - (1 - cl) / 2 if alternative != "upper" and x > 0: f = lambda q: cl - binom.cdf(x - 1, n, q) ci_low = brentq(f, 0.0, p, *kwargs) if alternative != "lower" and x < n: f = lambda q: binom.cdf(x, n, q) - (1 - cl) ci_upp = brentq(f, 1.0, p, *kwargs) return ci_low, ci_upp
def binom_test_v2(x, n=None, p=0.5, alternative='two-sided'): n = np.int_(n) if (p > 1.0) or (p < 0.0): raise ValueError("p must be in range [0,1]") if alternative not in ('two-sided', 'less', 'greater'): raise ValueError( "alternative not recognized should be 'two-sided', 'less' or 'greater'" ) if alternative == 'less': pval = binom.cdf(x, n, p) return pval if alternative == 'greater': pval = binom.sf(x - 1, n, p) return pval d = binom.pmf(x, n, p) rerr = 1 + 1e-7 a_fn = lambda x1: binom.pmf(x1, n, p) if x == p * n: pval = 1. elif x < p * n: y = n - binary_search(a_fn, d * rerr, np.ceil(p * n), n) + 1 pval = (binom.cdf(x, n, p) + binom.sf(n - y, n, p)) else: y = binary_search(a_fn, d * rerr, 0, np.floor(p * n) + 1, True) + 1 pval = (binom.cdf(y - 1, n, p) + binom.sf(x - 1, n, p)) return min(1.0, pval)
def Gupper(theta,y,n,j): y=np.asarray(y).copy() n=np.asarray(n).copy() if(j==len(y)-1): return (binom.cdf(k=y[j],n=n[j],p=theta)) return (binom.cdf(k=y[j]-1,n=n[j],p=theta)+binom.pmf(k=y[j],n=n[j],p=theta)*Gupper(theta=theta,y=y,n=n,j=j+1))
def quantile_interval(level, n, p): """ Calculate quantiles for confidence interval endpoints. Example: quantile_interval(0.90, 500, 0.90) gives the lower and upper endpoints (indices) for the 90th percentile of a probability distribution based on a sample of size 500. :param level: (float) confidence interval level between 0 and 1 :param n: (int) sample size :param p: (float) percentile of interest between 0 and 1 """ # TODO: need to check corner cases l = int(n * p) r = l dens = 0 while dens < level: prob_l = binom.pmf(l - 1, n, p) prob_r = binom.pmf(r + 1, n, p) if prob_l > prob_r: l += - 1 if l < 0: l = 0 break else: r += 1 if r >= n: r = n - 1 break dens = binom.cdf(r, n, p) - binom.cdf(l, n, p) return l, r
def CDF_error_analytic_bootstrap(n, target_quantile, quantile_quantile): target_count = int(target_quantile * float(n)) # Start off with a binary search small_ind = 0 big_ind = n - 1 small_prob = 1 - binom.cdf(target_count, n, 0) big_prob = 1 - binom.cdf(target_count, n, float(big_ind) / float(n)) while big_ind - small_ind > 4: mid_ind = (big_ind + small_ind) / 2 mid_prob = 1 - binom.cdf(target_count, n, float(mid_ind) / float(n)) if mid_prob > quantile_quantile: big_prob = mid_prob big_ind = mid_ind else: small_prob = mid_prob small_ind = mid_ind # Finish it off with a linear search prob_closest = -100 for p_num in xrange(small_ind, big_ind + 1): p = float(p_num) / float(n) coCDF_prob = 1 - binom.cdf(target_count, n, p) if abs(coCDF_prob - quantile_quantile) < abs(prob_closest - quantile_quantile): prob_closest = coCDF_prob prob_index = p_num return (prob_index)
def calc_probabilities(prevalence_per_100k=1, days_of_no_transmission_threshold=28, num_tests=4, r0=1, generation_interval=4.7, high_prev_pop_rel_likelihood=1, high_prev_testing_proportion=.1): prevalance_per_100k = simple_exponential_growth( initial_population=prevalence_per_100k, r_eff=r0, num_days=days_of_no_transmission_threshold, generation_interval=generation_interval) prevalance_per_100k = list(prevalance_per_100k) num_in_10k_list, num_in_90k_list = population_split( prevalance_per_100k, high_prev_pop_rel_likelihood) pr_detect_10k = 1 - np.prod([ binom.cdf(0, high_prev_testing_proportion * num_tests * 1000, current_prev / 10000) for current_prev in num_in_10k_list ]) pr_detect_90k = 1 - np.prod([ binom.cdf(0, (1 - high_prev_testing_proportion) * num_tests * 1000, current_prev / 90000) for current_prev in num_in_90k_list ]) pr_detect = 1 - (1 - pr_detect_10k) * (1 - pr_detect_90k) return pr_detect
def expctd_cond_leq_m(m, n, p): if m <= int(n / 2): return sum(binom.pmf(np.arange(m+1),n,p)\ /binom.cdf(m,n,p)*np.arange(m+1)) else: return n*p/binom.cdf(m,n,p)-binom.sf(m,n,p)\ /binom.cdf(m,n,p)*expctd_cond_gr_m(m,n,p)
def LogLikelihoodNgramsMultiple(Bow_unique, Bow_Ngrams, Bow_ngrams, N): """ Log Likelihood of having the last word of the N-gram considering the previous sequence, e.g., for the sequence [W1, W2, W3, W4], what is the probability of having W4 when there is a sequence of W1, W2 and W3. :param Bow_unique: Individual word frequency :param Bow_Ngrams: N-gram word frequency :param Bow_ngrams: N-gram - 1 word frequency :param N: total number of N-grams :return: probabilities """ Prob = {} for N_gram in Bow_Ngrams: individual_words = N_gram.split(" ") count_w1 = Bow_ngrams[" ".join(word for word in individual_words[:-1])] count_w2 = Bow_unique[individual_words[-1]] count_w12 = Bow_Ngrams[N_gram] p = count_w2 / N p1 = count_w12 / count_w1 p2 = (count_w2 - count_w12) / (N - count_w1) # Calculate individual binomial probabilities pbinom1 = binom.cdf(count_w12, count_w1, p) pbinom2 = binom.cdf(count_w2 - count_w12, N - count_w1, p) pbinom3 = binom.cdf(count_w12, count_w1, p1) pbinom4 = binom.cdf(count_w2 - count_w12, N - count_w1, p) # Log likelihood LL_i = np.log((pbinom1 * pbinom2) / (pbinom3 * pbinom4)) Prob[N_gram] = {"LL": LL_i, "p1": p1, "p2": p2, "p": p} return Prob
def logLikelihoodNgrams(BOW_unique, BOW_grams, N): """ Calculates the log likelihood probabilities for each ngrams. :param BOW_unique: dictionnary with each word and the corresponding counts :param BOW_grams: dictionnary with each n-gram and the corresponding counts :param N: value of the total number of n-grams :return: dictionnary with probabilities for each n-gram """ Prob = {} for n_gram in BOW_grams: count_w1 = BOW_unique[n_gram.split(" ")[0]] print(count_w1) count_w2 = BOW_unique[n_gram.split(" ")[1]] count_w12 = BOW_grams[n_gram] p = count_w2 / N p1 = count_w12 / count_w1 p2 = (count_w2 - count_w12) / (N - count_w1) #Calculate individual binomial probabilities pbinom1 = binom.cdf(count_w12, count_w1, p) pbinom2 = binom.cdf(count_w2 - count_w12, N - count_w1, p) pbinom3 = binom.cdf(count_w12, count_w1, p1) pbinom4 = binom.cdf(count_w2 - count_w12, N - count_w1, p) # Log likelihood LL_i = np.log((pbinom1 * pbinom2) / (pbinom3 * pbinom4)) Prob[n_gram] = {"LL": LL_i, "p1": p1, "p2": p2, "p": p} return Prob
def test_binomial_p(): """Binomial Test.""" np.testing.assert_almost_equal(binomial_p(5, 10, 0.5, 'greater'), 1 - binom.cdf(4, 10, 0.5)) np.testing.assert_almost_equal(binomial_p(5, 10, 0.5, 'less'), binom.cdf(5, 10, 0.5)) np.testing.assert_almost_equal(binomial_p(5, 10, 0.5, 'two-sided'), 1)
def naive_call(self, player_id): r = self.sim_rollout[player_id][0] + self.sim_rollout[player_id] r[0] = self.sim_rollout[player_id][0] N = sum(self.dice) other_dice = N - self.dice[player_id] if self.last_bid[0] > r[self.last_bid[1]] + other_dice: return [0] p_call_liar = binom.cdf(self.last_bid[0] - r[self.last_bid[1]] - 1, other_dice, 1 / 6 + (self.last_bid[1] != 0) / 6) odds = np.zeros((1 + N, 6)) lower_lim = get_legit_bids(self.last_bid) for i in range(6): p = 1 / 6 + (i != 0) / 6 upper = int(binom.isf(0.15, other_dice, p)) + r[i] lower = lower_lim[i] odds[lower:upper + 1, i] = ( 1 - binom.cdf(np.arange(-r[i] - 1, -r[i] + N), other_dice, p) * binom.cdf(np.arange(-1, N), N, p))[lower:upper + 1] if p_call_liar > 0.7 or np.random.sample() < p_call_liar / ( p_call_liar + np.sum(odds)): return [0] else: odds = (odds**3).flatten() odds /= np.sum(odds) #print('odd',odds) index = np.random.choice(np.arange(len(odds)), p=odds) return [index // 6, index % 6]
def transition_prob_naive(result, rollout, pre_bid, bid, call_belief): r = rollout[0] + rollout r[0] = rollout[0] N = len(result) - 1 #num of total dice other_dice = N - sum(rollout) odds = np.zeros((1 + N, 6)) if pre_bid is not None: lower_lim = get_legit_bids(pre_bid) for i in range(6): p = 1 / 6 + (i != 0) / 6 upper = int(binom.isf(0.15, other_dice, p)) + r[i] if pre_bid is None: lower = max(1, int(binom.isf(0.85, other_dice, p)) + r[i]) else: lower = max(1, lower_lim[i]) odds[lower:upper + 1, i] = (1 - binom.cdf(np.arange(-r[i] - 1, -r[i] + N), other_dice, p) * binom.cdf(np.arange(-1, N), N, p))[lower:upper + 1] odds = odds**(3 + int(9 / other_dice**2)) # if sum(rollout)==1: # print(rollout,pre_bid,bid) # print(odds) if odds[bid[0], bid[1]] == 0: return 0 return odds[bid[0], bid[1]] / np.sum(odds)
def generate_mapping(lines, answers, combination): answers = np.array(answers) p = len(answers[answers]) / float(len(answers)) #print "True questions fraction: {0}".format(p) letter_freq = su.calc_freq_over_cols(lines, combination) correlation_list = su.calc_correlations(lines, answers, combination) sorted_corr_indices = np.argsort(correlation_list)[::-1] cur_freq = 0.0 added_to_positive = 0 k = len(combination) halfK = k / 2.0 cur_proba = 1.0 - binom.cdf(halfK, k, cur_freq) while cur_proba < p: cur_freq += letter_freq[su.alphabet[ sorted_corr_indices[added_to_positive]]] added_to_positive += 1 cur_proba = 1.0 - binom.cdf(halfK, k, cur_freq) mapping = {} for i in range(added_to_positive): mapping[su.alphabet[sorted_corr_indices[i]]] = 1.0 for i in range(added_to_positive, len(su.alphabet)): mapping[su.alphabet[sorted_corr_indices[i]]] = 0.0 return mapping
def Warranty(self, sales_volume, cost_warranty): self.sales_volume = sales_volume self.cost_warranty = cost_warranty print('Compute warranty cost for ' + self.name + ' RDT') failureprob = sum( binom.cdf(k=self.c, n=self.n, p=self.pi) * self.pi) / sum( binom.cdf(k=self.c, n=self.n, p=self.pi)) return self.cost_warranty * failureprob * self.sales_volume, failureprob
def _team_independent_start_pct(self, week): def projected_leaders(positions, league=self.league): return sorted(league.player_universe.values(), key=lambda player: player.weekly_points(week) if player.weekly_points(week) and player.position in positions else 0.0, reverse=True) week_injury_rate = self.league.injury_simulations[self.position][week - 1] weekly_projected_leaders = projected_leaders([self.position]) player_rank = weekly_projected_leaders.index(self) total_starting_players = (self.league.roster_settings[self.position] * self.league.roster_settings['teams']) non_flex_pct = 1 if player_rank < total_starting_players else ( 1 - binom.cdf( # Probability that few enough players ahead get hurt to prevent # player from starting player_rank - total_starting_players, player_rank, week_injury_rate)) if (self.position not in self.league.roster_settings['flex_positions'] or non_flex_pct == 1 or self.league.roster_settings['flex'] == 0): self.independent_start_pcts[week] = non_flex_pct return non_flex_pct else: weekly_projected_leaders = projected_leaders( self.league.roster_settings['flex_positions']) player_rank = weekly_projected_leaders.index(self) total_flex_eligible_starters = (sum([ self.league.roster_settings[pos] for pos in self.league.roster_settings['flex_positions'] ]) * self.league.roster_settings['teams']) injury_rates = [] injury_rate_weights = [] for pos in self.league.roster_settings['flex_positions']: injury_rates.append(self.league.injury_simulations[pos][week - 1]) injury_rate_weights.append(self.league.roster_settings[pos]) flex_pct = 1 if player_rank < total_flex_eligible_starters else ( 1 - binom.cdf( # Probability that few enough players ahead get hurt to # prevent player from starting player_rank - total_flex_eligible_starters, player_rank, np.average(injury_rates, weights=injury_rate_weights))) start_pct = (1 - non_flex_pct) * flex_pct + non_flex_pct self.independent_start_pcts[week] = start_pct return start_pct
def confidence_variation(times, quantile, confidence_interval): """ Calculate the confidence interval :param times: The list of times for the calculation :param quantile: the quantile we are going to use :param confidence_interval: The confidence interval :return: confidence_variation, lower confidence, upper confidence """ assert isinstance(times, list) assert 0 < quantile < 1, "Quantile value is " + str( quantile) + "which should be between 0 and 1" assert 0.5 < confidence_interval < 1, "Desired confidence interval should be between 0.5 and 1" sorted_times = sorted(remove_outliers(times)) q = mquantiles(sorted_times, quantile)[0] n = len(sorted_times) # This should not happen, just for debugging purposes if not n: print(times) confidence = 0 middle = round(quantile * (n + 1)) ui = middle li = middle while confidence < confidence_interval: if ui < n - 1: ui = ui + 1 if li > 0: li = li - 1 confidence = binom.cdf(ui - 1, n, quantile) - binom.cdf( li - 1, n, quantile) if ui >= n - 1 and li <= 0: break if ui >= n - 1: ui = n - 1 if li <= 0 or li > ui: li = 0 try: lower_range = sorted_times[li] upper_range = sorted_times[ui] except IndexError: # This should not happen. Just for debugging purposes print("Lower range", li) print("Upper range", ui) print("List length", len(sorted_times)) sys.exit(1) pass confidence_range = upper_range - lower_range return (confidence_range / q) * 100, lower_range, upper_range
def get_batch_artefacts(self, gp): # using binom, and gnomad_af as p, to produce probability to help identify batch specific artefacts # lower_bound is there to remove cohorts where there is just one patient # zero_gnomad_c_cutoff allows max internal count when gnomad_af is 0 dt_d = defaultdict(Counter) dt_r = defaultdict(Counter) cohorts = Counter() for k, v in gp['patients'].items(): cohorts[self.patient_mini[k]['contact']] += 1 vc = Counter(v['variants']) for i in vc: dt_d[self.patient_mini[k]['contact']][i] += 1 if vc[i] > 1: dt_r[self.patient_mini[k]['contact']][i] += 1 # remove cohorts with count lower than lower_bound for k in cohorts.keys(): if cohorts[k] < self.lower_bound: del cohorts[k] del dt_d[k] if k in dt_r: del dt_r[k] # for heterozygous variants result_d = defaultdict(list) for k1, v1 in dt_d.items(): n_variants = len(v1) for k2, v2 in v1.items(): if not gp['variants'][k2]['gnomad_af']: if v2 > self.zero_gnomad_c_cutoff: result_d[k1].append(k2) continue prob = 1 - binom.cdf(v2 - 1, cohorts[k1], gp['variants'][k2]['gnomad_af']) if prob < self.binom_cutoff / n_variants: #print(k2,prob) result_d[k1].append(k2) for k in result_d: result_d[k] = set(result_d[k]) # for homozygous variants result_r = defaultdict(list) for k1, v1 in dt_r.items(): n_variants = len(v1) for k2, v2 in v1.items(): if not gp['variants'][k2]['gnomad_hom_af']: if v2 > self.zero_gnomad_c_cutoff: result_r[k1].append(k2) continue prob = 1 - binom.cdf(v2 - 1, cohorts[k1], gp['variants'][k2]['gnomad_hom_af']) if prob < self.binom_cutoff / n_variants: #print(k2,prob) result_r[k1].append(k2) for k in result_r: result_r[k] = set(result_r[k]) return {'d': result_d, 'r': result_r}
def CRR_formula_put(S0, K, T, u, d, r): """ CRR_formula_put(S0, K, T, u, d, r) = initial price of a call option using CRR formula CRR formula = (K/R^T)*phi(A;T,q) - S_0*phi(A;T,q_dash) where: phi(_;_,_) is a binomial distribution function A = minimum number of upmoves for the option to be in the money q_dash = q*(U/D) q = risk neutral probability S0 = initial asset price K = strike pirce T = expiry time u = up d = down r = fixed interest rate """ # import modules import numpy as np from scipy.stats import binom # check input paramters if S0 <= 0.0 or d <= -1 or d >= u or r <= -1: print("Invalid input arguments") print("Terminating program") return (1) # check for arbitrage if not (d < r < u): print("Model contains arbitrage") print("Terminating program") return (1) # import modules import math import numpy as np from scipy.stats import binom # crr variables U = 1 + u D = 1 + d R = 1 + r # calculating terms: q, q_dash, A q = (R - D) / (U - D) q_dash = ((R - D) / (U - D)) * (U / R) A = math.ceil((np.log(K / (S0 * D**T)) / np.log(U / D))) - 1 # price of call option price = (K / (R**T)) * binom.cdf(A, T, q) - \ S0 * binom.cdf(A, T, q_dash) return (price)
def test_row(row): af1 = row['af1'] depth1 = row['depth1'] depth2 = row['depth2'] p = af1 if p == 1.0: p = 0.99 pval1 = binom.cdf(0, depth1, p) pval2 = binom.cdf(0, depth2, p) return pval1 < 0.05 and pval2 < 0.05
def hypergeom_conf_interval(n, x, N, cl=0.975, alternative="two-sided", G=None, **kwargs): """ Compute a confidence interval for a hypergeometric G, the number of good objects in the population. Parameters ---------- n : int The number of Bernoulli trials. x : int The number of "good" objects in the sample. N : int The number of objects in the population cl : float in (0, 1) The desired confidence level. alternative : {"two-sided", "lower", "upper"} Indicates the alternative hypothesis. G : int in [0, N] Starting point in search for confidence bounds for hypergeometric G. kwargs : dict Key word arguments Returns ------- tuple lower and upper confidence level with coverage (at least) 1-alpha. Notes ----- xtol : float Tolerance rtol : float Tolerance maxiter : int Maximum number of iterations. """ assert alternative in ("two-sided", "lower", "upper") if G is None: G = (x / n)*N ci_low = 0.0 ci_upp = N if alternative == 'two-sided': cl = 1 - (1-cl)/2 if alternative != "upper" and x > 0: f = lambda q: cl - binom.cdf(x - 1, n, q) ci_low = brentq(f, 0.0, p, *kwargs) if alternative != "lower" and x < n: f = lambda q: binom.cdf(x, n, q) - (1 - cl) ci_upp = brentq(f, 1.0, p, *kwargs) return ci_low, ci_upp
def CRR_formula_strap(S0, K, T, u, d, r): """ CRR_formula_strap(S0, K, T, u, d, r) = initial price of a strap option using CRR formula CRR formula = 2 * pi_c + pi_p where: pi_c = price of a call option pi_p = price of a put option q = risk neutral probability S0 = initial asset price K = strike pirce T = expiry time u = up d = down r = fixed interest rate """ # check input paramters if S0 <= 0.0 or d <= -1 or d >= u or r <= -1: print("Invalid input arguments") print("Terminating program") return (1) # check for arbitrage if not (d < r < u): print("Model contains arbitrage") print("Terminating program") return (1) # import modules import math import numpy as np from scipy.stats import binom # crr variables U = 1 + u D = 1 + d R = 1 + r # calculating terms: q, q_dash, A q = (R - D) / (U - D) q_dash = ((R - D) / (U - D)) * (U / R) A = math.floor((np.log(K / (S0 * D**T)) / np.log(U / D))) # price of put and call of call option pi_p = (K / (R**T)) * binom.cdf(A, T, q) - \ S0 * binom.cdf(A, T, q_dash) pi_c = S0 * (1 - binom.cdf(A, T, q_dash)) - (K / (R**T)) * (1 - binom.cdf(A, T, q)) return (pi_p + 2 * pi_c)
def updateFOS(path, db, motifChrom="chr17", dgfCutoff=20, method="Binom"):#, flankWin=35): """calculate fos from discontinuous variableStep wiggle files with two method options: NSD/Binomial test""" mcollection = db["hg19"+motifChrom] print 'updating fos', motifChrom for infile in glob.glob(os.path.join(path,"wgEncodeUwDgf*_cut"+motifChrom)): #(wigpath,wigfile) = os.path.split(infile) #(wigfilename,ext) = os.path.splitext(infile) wigfilename = infile.split(motifChrom)[0] expName = "fos" ctName = wigfilename.split('EncodeUwDgf')[-1].split('Aln')[0] wigFile = open(infile,'rt') #wig = csv.reader(wigFile,delimiter='\t') #bwFile = os.path.join(path,wigfilename+'.bw') #countWig.compressVarWig(wigFile, expName, wigfilename) bwFile = wigfilename+motifChrom+'.bw' if not os.path.isfile(bwFile): countWig.compressVarWig(wigFile, expName, wigfilename) coordDict, valuesDict = countWig.getBinVarCoord(bwFile,ctName) arrayDict = defaultdict(list) cursor = mcollection.find({"tf_name":{"$in": ["IRF3","MAFK","NFYA","SIN3A","ZNF384"]}}) #{"tf_name": tfName, # "motif_score":{"$lt":1e-4}, # "motif_genomic_regions_info.chr": motifChrom}) for test in cursor: if not motifChrom in arrayDict: arrayDict[motifChrom] = countWig.buildVarHist(motifChrom,coordDict,valuesDict,ctName) xs, xvals, sums = arrayDict[motifChrom] motifStart = test["genomic_region"]["start"] motifEnd = test["genomic_region"]["end"] flankWin = round((motifEnd - motifStart + 1)*1.75) flankL = max(0, motifStart - flankWin) flankR = motifEnd + flankWin countTotL = countWig.queryHist(xs, xvals, sums, flankL, motifEnd, varWindow=True)[2] countTotR = countWig.queryHist(xs, xvals, sums, motifStart, flankR, varWindow=True)[2] countCent = countWig.queryHist(xs, xvals, sums, motifStart, motifEnd)[2] count = countWig.queryHist(xs, xvals, sums, motifStart-100, motifEnd+100, varWindow=True)[2] if method == "NSD": try: fos = np.sqrt((count-countCent)/count)-np.sqrt(countCent/count) except ZeroDivisionError: fos = 0 elif method == "Binom": try: fos = min(1 - binom.cdf(countCent,countTotL,float(motifEnd-motifStart)/(motifEnd-flankL)), 1 - binom.cdf(countCent,countTotR,float(motifEnd-motifStart)/(flankR-motifStart))) except ZeroDivisionError: fos = 0 if fos > 0.95 and count-countCent > dgfCutoff:#(flankR-flankL-(motifEnd-motifStart)): mcollection.update({"_id":test["_id"]},{"$set":{"dgf.fos": fos}}, upsert = True) #print motifChrom+'\t'+str(motifStart)+'\t'+str(motifEnd)+'\t'+str(fos) return 0
def calculate_p_tie(row, n=12): pa = row[6] pb = row[7] a_wins = sum( binom.pmf(xa + 1, n, pa) * binom.cdf(xa, n, pb) for xa in range(n) ) b_wins = sum( binom.pmf(xb + 1, n, pb) * binom.cdf(xb, n, pa) for xb in range(n) ) return 1/(1 - a_wins - b_wins)
def BinModEUPut(S0, r, sigma, T, K, M): dt = T / M b = 0.5 * (np.exp(-r * dt) + np.exp((r + sigma**2) * dt)) u = b + np.sqrt(b**2 - 1) d = 1 / u q = (np.exp(r * dt) - d) / (u - d) a = np.ceil((np.log(K / S0) - M * np.log(d)) / (np.log(u / d))) qtilde = q * u / np.exp(r * dt) V0 = K * np.exp(-r * T) * binom.cdf(a - 1, M, q) - S0 * binom.cdf( a - 1, M, qtilde) return V0
def binom_conf_interval(n, x, cl=0.975, alternative="two-sided", p=None, **kwargs): """ Compute a confidence interval for a binomial p, the probability of success in each trial. Parameters ---------- n : int The number of Bernoulli trials. x : int The number of successes. cl : float in (0, 1) The desired confidence level. alternative : {"two-sided", "lower", "upper"} Indicates the alternative hypothesis. p : float in (0, 1) Starting point in search for confidence bounds for probability of success in each trial. kwargs : dict Key word arguments Returns ------- tuple lower and upper confidence level with coverage (approximately) 1-alpha. Notes ----- xtol : float Tolerance rtol : float Tolerance maxiter : int Maximum number of iterations. """ assert alternative in ("two-sided", "lower", "upper") if p is None: p = x / n ci_low = 0.0 ci_upp = 1.0 if alternative == 'two-sided': cl = 1 - (1-cl)/2 if alternative != "upper" and x > 0: f = lambda q: cl - binom.cdf(x - 1, n, q) ci_low = brentq(f, 0.0, p, *kwargs) if alternative != "lower" and x < n: f = lambda q: binom.cdf(x, n, q) - (1 - cl) ci_upp = brentq(f, 1.0, p, *kwargs) return ci_low, ci_upp
def gammatest(comp, data, weights, num): # set up vector for quicker search # (select first sample) # a large number of times # select random sample # calculate normalized distance from last sample # sort distances # express expected gamma parameters # find the distance at which deviation from expected distr is largest # compute its significane level data = data[comp.feet] dim = len(comp.feet) acc = np.cumsum(weights) dists = [False]*num oldind = selectind(acc) ind = oldind wsum = 0 for i in range(num): while ind == oldind: ind = selectind(acc) dist = sqvec(np.matmul(comp.ilower, np.subtract(data.iloc[ind], data.iloc[oldind]))) w = weights[oldind] # Because weights[ind] is already accounted for in selection dists[i] = (dist, w) wsum += w oldind = ind dists.sort(key = lambda pr:pr[0]) mnval = 0 mnind = 0 w = 0 for i in range(num): (d, w0) = dists[i] w += w0 p = gammainc(dim/2, d/4) # print((wsum, w, p)) val = log(wsum*p/w)*w + log(wsum*(1-p)/(wsum-w))*(wsum-w) if p > 0 and p < 1 and w > 0 and w < wsum else 0 dists[i] = (d, w, p, p*wsum - w, val) if val<mnval and p<0.90 and p>0.05 and p*wsum > w: mnval = val mnind = i (d, w,z1,z2,z3) = dists[mnind] p = gammainc(dim/2, d/4) if mnval == 0: sig = 1.0 elif w > wsum*p: n = ceil(w) nn = ceil(wsum) sig = binom.cdf((nn-n), nn, (1-p)) else: n = floor(w) nn = ceil(wsum) sig = binom.cdf(n, nn, p) return (sig, dists)
def evaluate_policy(new_policy, env, sample): env.time_step(new_policy) X_I, X_S = sample.X_I, sample.X_S currentV_I = sample.val_I currentV_L = sample.val_L meanX_S, meanX_I, meanX_R = env.sample_stochastic() errX_S, errX_I, errX_R = env.get_error() val_I = meanX_I val_L = new_policy lowXS = max(round(meanX_S - errX_S, 0), 0) uppXS = min(round(meanX_S + errX_S, 0), env.M) # lowXI=max(round(meanX_I-errX_I,0),0) # uppXI=min(round(meanX_I+errX_I,0),env.M) lowXR = max(round(meanX_R - errX_R, 0), 0) uppXR = min(round(meanX_R + errX_R, 0), env.M) lowI = max(X_S - uppXS, 0) uppI = min(X_S, X_S - lowXS) lowR = max(lowXR - (env.M - X_I - X_S), 0) uppR = min(X_I, uppXR - (env.M - X_I - X_S)) for i in range(lowI, uppI): for j in range(lowR, uppR): probI = poisson.pmf(i, env.beta) probR = binom.pmf(j, uppR, env.gamma) if i == lowI: probI = poisson.cdf(i, env.beta) if i == uppI: probI = 1 - poisson.cdf(i - 1, env.beta) if j == lowR: probR = binom.cdf(j, uppR, env.gamma) if j == uppR: probR = 1 - binom.cdf(j - 1, uppR, env.gamma) val_I += 0.97 * probI * probR * currentV_I[X_I + i - j - 1, X_S - i - 1] val_L += 0.97 * probI * probR * currentV_L[X_I + i - j - 1, X_S - i - 1] objs = [val_I, val_L] return objs
def point_statistics(self, parent_region, test_region, point_region): cur = self.con.cursor() r = dict() basepair_query = 'select basepairs from genomic_region where name=?' cur.execute(basepair_query, [self.canonical_name(parent_region)]) r['basepairs_in_parent_region'] = cur.fetchone()[0] cur.execute(basepair_query, [self.canonical_name(parent_region + '%' + test_region)]) r['basepairs_in_test_region'] = cur.fetchone()[0] cur.execute(basepair_query, [self.canonical_name(parent_region + '%' + point_region)]) r['points_in_parent_region'] = cur.fetchone()[0] cur.execute(basepair_query, [self.canonical_name(parent_region + '%' + test_region + '%' + point_region)]) r['points_in_test_region'] = cur.fetchone()[0] if r['basepairs_in_parent_region'] == 0: r['test_region_basepair_ratio'] = 1 else: r['test_region_basepair_ratio'] = r['basepairs_in_test_region'] / \ r['basepairs_in_parent_region'] if r['points_in_parent_region'] == 0: r['test_region_point_count_ratio'] = 1 r['enrichment_pvalue'] = 1 r['depletion_pvalue'] = 1 r['log2_enrichment_ratio'] = 0 r['enrichment_ratio'] = 1.0 else: r['test_region_point_count_ratio'] = r['points_in_test_region'] / r['points_in_parent_region'] r['depletion_pvalue'] = binom.cdf(k=r['points_in_test_region'], n=r['points_in_parent_region'], p=r['test_region_basepair_ratio']) r['enrichment_pvalue'] = 1 - \ binom.cdf(k=r['points_in_test_region'] - 1, n=r['points_in_parent_region'], p=r['test_region_basepair_ratio']) r['enrichment_ratio'] = r['test_region_point_count_ratio'] /\ r['test_region_basepair_ratio'] if r['points_in_test_region'] == 0: r['log2_enrichment_ratio'] = float('-inf') else: r['log2_enrichment_ratio'] = math.log2(r['enrichment_ratio']) return r
def main(n, p_0, alpha=1 / 10, outdir='out'): testScipy(n, p_0, outdir) k_cutoff = binom.ppf(alpha, n, p_0) - 1 print('k*: {}'.format(k_cutoff)) print('P(binom < k*): {}'.format(binom.cdf(k_cutoff, n, p_0))) fig, ax = plt.subplots(1, 1) x = list(range(0, n + 1)) ax.plot(x, binom.cdf(x, n, p_0), 'ro', ms=5) ax.plot([0, n], [alpha, alpha], 'g') ax.plot([k_cutoff, k_cutoff], [0, 1], 'b') # TODO alpha and k* plt.savefig(outdir + "/{}_{}.png".format(n, p_0))
def probability_above(fun_resample, gamma, max_samp=None, comm=MPI.COMM_SELF, batch=5, tol=0, bound_significance=0.01, print_per_batch=False, exception_at_max_samp=False, printing=True): ''' Returns True if P(fun_resample()) is significantly above gamma, returns False if P(fun_resample()) is significantly below gamma. Increases samples size until significance is obtained. (null hypothesis is p = gamma). ''' vals = np.zeros((0,)) s = "gamma = {}".format(gamma) while True: vals_new_samp = bootstrap(fun_resample, batch, comm=comm, dtype=np.bool_) #if True:#gamma == 0.05: # print_all_ranks(comm, str(vals_new_samp)) #vals_new_samp = vals_new_samp[~np.isnan(vals_new_samp)] vals = np.hstack([vals, vals_new_samp.astype(np.float_)]) upper_bound_pval = binom.cdf(np.sum(vals), len(vals), gamma+tol) lower_bound_pval = 1 - binom.cdf(np.sum(vals)-1, len(vals), gamma-tol) s += ("\nnp.mean(vals) = {}".format(np.mean(vals)) + "\nlen(vals) = {}".format(len(vals)) + "\nupper_bound_pval = {}".format(upper_bound_pval) + "\nlower_bound_pval = {}".format(lower_bound_pval)) if upper_bound_pval <= bound_significance: s += '\n---' if printing: print_rank0(comm, s) return False # we have lower bound instead. if lower_bound_pval <= bound_significance: s += "\n---" if printing: print_rank0(comm, s) return True if not max_samp is None: if len(vals) > max_samp: if exception_at_max_samp: raise MaxSampExceededException s += "\n---"+"\n"+"max_samp reached" print_rank0(comm, s) lower_bound = np.random.rand(1) < 0.5 lower_bound = comm.bcast(lower_bound) if lower_bound: # 50% chance to be above or below return True return False batch = len(vals) if print_per_batch: if printing: print_rank0(comm, s) s = "gamma = {}".format(gamma)
def calculateProbabilities(counts, window_size, length, start=0): '''Calculates the probablity of observing the counted number of reads in windows of "window_size" around each cross-linked size. Currently assumes that coordinates are in transcript space. The length of the transcript must be provided because it is needed for calculating the prior and may be outside of the provided coordinates. Start allows, together with length, for only using part of counts.''' # limit to subset counts = counts[(counts.index.values >= start) & (counts.index.values < (start + length))] total_counts = counts.sum() # probability is counts-2 because we want P(X>=x) which is # 1 - P(X<x-1). Thats -1. The other -1 comes from the fact # that we want the p that any base in the transcript has # X>=x, not just this specific one. single_base_ps = 1 - binom.cdf(counts-2, total_counts, 1.0/length) heights = np.zeros(len(counts)) window_start = np.maximum(0, counts.index.values-window_size) window_end = np.minimum(start+length, counts.index.values + window_size) ps = (window_end - window_start + 0.0) / length ps = ps.astype("float64") for i, base in enumerate(counts.index.values): try: window = counts[window_start[i]:window_end[i]] except KeyError: print (window_start, window_end) print counts heights[i] = window.sum() heights = heights - counts.values window_ps = pd.Series(1 - binom.cdf(heights - 1, total_counts, ps), index=counts.index) # correct for number of independent windows. return window_ps*single_base_ps
def calculateProbabilities(counts, window_size, length, start=0): '''Calculates the probablity of observing the counted number of reads in windows of "window_size" around each cross-linked size. Currently assumes that coordinates are in transcript space. The length of the transcript must be provided because it is needed for calculating the prior and may be outside of the provided coordinates. Start allows, together with length, for only using part of counts.''' # limit to subset counts = counts[(counts.index.values >= start) & (counts.index.values < (start + length))] total_counts = counts.sum() # probability is counts-2 because we want P(X>=x) which is # 1 - P(X<x-1). Thats -1. The other -1 comes from the fact # that we want the p that any base in the transcript has # X>=x, not just this specific one. single_base_ps = 1 - binom.cdf(counts - 2, total_counts, 1.0 / length) heights = np.zeros(len(counts)) window_start = np.maximum(0, counts.index.values - window_size) window_end = np.minimum(start + length, counts.index.values + window_size) ps = (window_end - window_start + 0.0) / length ps = ps.astype("float64") for i, base in enumerate(counts.index.values): try: window = counts[window_start[i]:window_end[i]] except KeyError: print(window_start, window_end) print counts heights[i] = window.sum() heights = heights - counts.values window_ps = pd.Series(1 - binom.cdf(heights - 1, total_counts, ps), index=counts.index) # correct for number of independent windows. return window_ps * single_base_ps
def plotBinomCDF(): # this is just a very useless function, not adjustable, has to be refactored tau_p = 80 k = 100 p = 0.25 fig, ax = plt.subplots(1, 1) print(binom.cdf(tau_p, k, 0.9)) print(1 - binom.cdf(tau_p - 1, k, p)) x = np.arange(0, k) ax.plot(x, binom.cdf(x, k, p), 'bo', ms=5, label='binom cdf') ax.vlines(x, 0, binom.cdf(x, k, p), colors='b', lw=5, alpha=0.5) plt.show()
def equivalence(self, x, n, pd0=0, conf_level=0.95): """ Equivalence one-tailed test pc >= pc0 H0: pd >= pd0 d' >= d'0 pc < pc0 H1: pd < pd0 d' < d'0 """ alpha = 1 - conf_level pg = self.method.guessing pc = x / n pc0 = pg + (1 - pg) * pd0 pd = (pc - pg) / (1 - pg) dprime = fsolve(lambda d: self.method.psychfunc(d) - pc, 1.0)[0] p_value = binom.cdf(x, n, pc0) xcrit = binom.ppf(alpha, n, pc0) + 1 power = binom.cdf(xcrit, n, pc) pc_err = np.sqrt(pc * (1 - pc) / n) pd_err = pc_err / (1 - pg) der = derivative(self.method.psychfunc, dprime, dx=1e-6) dprime_err = pc_err / der # Lower limits pc_lower = max(beta.ppf(alpha / 2, x, n - x + 1), pg) pd_lower = (pc_lower - pg) / (1 - pg) dprime_lower = fsolve(lambda d: self.method.psychfunc(d) - pc_lower, 1.0)[0] # Upper limits pc_upper = min(beta.ppf(1 - alpha / 2, x + 1, n - x), 1.0) pd_upper = (pc_upper - pg) / (1 - pg) dprime_upper = fsolve(lambda d: self.method.psychfunc(d) - pc_upper, 1.0)[0] results = TestResults( pg, Statistic(pc, pc_err, pc_lower, pc_upper), Statistic(pd, pd_err, pd_lower, pd_upper), Statistic(dprime, dprime_err, dprime_lower, dprime_upper), p_value, alpha, power) return results
def binomial_ci(mle, N, alpha=0.05): """ One sided confidence interval for a binomial test. To find the two sided interval, call with (1-alpha/2) and alpha/2 as arguments Parameters ---------- mle : float Fraction of successes N : int Number of trials If after N trials we obtain mle as the proportion of those trials that resulted in success, find c such that P(k/N < mle; theta = c) = alpha where k/N is the proportion of successes in the set of trials, and theta is the success probability for each trial. """ from scipy.stats import binom from scipy.optimize import bisect to_minimise = lambda c: binom.cdf(mle*N,N,c)-alpha return bisect(to_minimise,0,1)
def triplet_prob(Nw,t,ptriplet,psingle): weights = np.zeros(Nw+1) # number of non-singles must be greater than or equal to t for ns in xrange(t,Nw+1): weights[ns] = 1.0 - binom.cdf(t-1,ns,ptriplet) return sum(weights * binom.pmf(np.linspace(0,Nw,Nw+1),Nw,(1.0-psingle)))
def check(N, p): global numfails, numchecks, mu, sigma2 H = NeuronGroup(1, 'v:1', threshold='False', name='H') G = NeuronGroup(N, 'v:1', threshold='False', name='G') S = Synapses(H, G, on_pre='v+=w', name='S') S.connect(p=p) m = len(S) low, high = binom.interval(alpha, N, p) if p==0: low = high = 0 elif p==1: low = high = N else: i = diff(S.j[:]) i = i[i<isi_max[p]] b = bincount(i, minlength=isi_max[p])[:isi_max[p]] if b[0]: print 'Major error: repeated indices for N=%d, p=%.3f' % (N, p) raise ValueError("Repeated indices") isi[p] += b num_isi[p] += sum(b) q = binom.cdf(low-0.1, N, p)+binom.sf(high+0.1, N, p) mu += q sigma2 += q*(1-q) numchecks += 1 if m<low or m>high: numfails += 1 return True else: return False
def value_picks(picks): points = get_points(picks,data) num_picks = len(picks) avg_picks = sum(points) / num_picks p = {str(n+1) + ' Star': 1 - binom.cdf(n, num_picks, avg_picks) for n in range(5)} return p
def clean_value_picks(self): # return this in a clean format points = get_points(self.picks) num_picks = self.quantity avg_picks = sum(points) / num_picks p = {str(n+1) + ' Star': str(int(100*(1 - binom.cdf(n, num_picks, avg_picks)))) + '%' for n in range(5)} return p
def clean_value_picks(picks): points = get_points(picks,data) num_picks = len(picks) avg_picks = sum(points) / num_picks p = {str(n+1) + ' Star': str(int(100*(1 - binom.cdf(n, num_picks, avg_picks)))) + '%' for n in range(5)} return p
def solve(problem): N, X, Y = problem if N == 0: return 0.0 i = find_int_by_bisection(s, 1, N, N) if is_triangle_diamond(X, Y, i): return 1.0 # case A if not is_triangle_diamond(X, Y, i + 1): return 0.0 # case C # case B if X == 0: return 0.0 # B(1) m = s(i + 1) - N - 1 if m < b(i + 1): return binom.cdf(abs(X) - 1, m, 0.5) # B(2) return 1.0 - binom.cdf(b(i + 1) - abs(X) - 1, N - s(i), 0.5) # B(3)
def fraction_disc(results,N,overlap,fig,alpha=None,multiple_correction=False,n_replicates=None): """ Given test results, a number of components N, a level of overlap between odorants, a reference figure panel ('a' or 'b'), an optional choice of significance threshold alpha, whether or not to do multiple comparisons correction (false discovery rate method), and an optional new number of replicates (subjects or tests), returns an array containing either the fraction of correct responses (if alpha is None) or whether or not that fraction is significantly above chance (if alpha is a number). This function assists with generating variants of Figs. 2B, 2C, 3A, and 3B in Bushdid et al. """ assert fig in ['a','b'] correct,_,_ = correct_matrix(results,N,overlap) if fig == 'a': dim = 1 elif fig == 'b': dim = 0 fract_correct = np.mean(correct,dim) if alpha is not None: if not n_replicates: n_replicates = correct.shape[dim] # n_subjects or n_tests. ps = 1.0 - binom.cdf(fract_correct*n_replicates,n_replicates,1.0/3) if multiple_correction == 'bonferroni': alpha = alpha/len(ps) if multiple_correction == 'fdr': ps = np.array([p*len(ps)/(k+1) for k,p in enumerate(sorted(ps))]) fract_sig = ps < alpha/2 return fract_sig else: return fract_correct
def __init__(self, Year, pvalue = 0.01): A =Year.Adj v = float(A.sum()) n, m = A.shape self.sets = (n, m) alpha = pvalue / float(n * m) in_degree = A.sum(0) out_degree = A.sum(1) i, j, aij = extract.find(A) nonzero = len(i) pij = np.zeros((nonzero, )) for h in xrange(nonzero): pij[h] = out_degree[i[h]] * in_degree[0,j[h]] / v**2 P = 1-binom.cdf(aij - 1,v,pij) data = 1. * (P<= alpha) zero_entries = np.where(data == 0) data = np.delete(data, zero_entries) i = np.delete(i, zero_entries) j = np.delete(j, zero_entries) aij = np.delete(aij,zero_entries) ij = np.asarray(zip(i,j)).T self.svnet = csc_matrix((data, ij)) self.Adj = csc_matrix((aij,ij)) self.filename = Year.filename self.edgetype = Year.edgetype self.banks = Year.banks self.firms = Year.firms self.descr = 'valid network'
def pbinom(x, size=1, prob=0.5, lowertail=True, log=False): """ ============================================================================ pbinom() ============================================================================ The cumulative distribution function for the binomial distribution. You provide a value along the binomial distribution (eg x=3) or array of values, and it returns what proportion of values lie below it (the quantile) Alternatively, if you select lowertail=False, it returns the proportion of values that are above it. USAGE: dbinom(x, size, prob=0.5, log=False) pbinom(x, size, prob=0.5, lowertail=True, log=False) qbinom(q, size, prob=0.5, lowertail=True) rbinom(n=1, size=1, prob=0.5) :param x: int. or array of ints. The values along the distribution. :param size: int. Number of trials :param prob: float. Probability of a success :param lowertail bool. are you interested in what proportion of values lie beneath x? :param log: bool. take the log? :return: an array of quantiles() corresponding to the values in x ============================================================================ """ if lowertail and not log: return binom.cdf(x, n=size, p=prob) elif not lowertail and not log: return binom.sf(x, n=size, p=prob) elif lowertail and log: return binom.logcdf(x, n=size, p=prob) else: return binom.logsf(x, n=size, p=prob)
def __init__(self, Year, pvalue = 0.01): A = Year.Adj n, m = A.shape alpha = pvalue / n / m in_degree = A.sum(0) out_degree = A.sum(1) i, j, wij = extract.find(A) indices = np.where(wij > 0) eps = max(wij[indices].min(),0.1) v = A.sum() / eps nonzero = len(i) pij = np.zeros((nonzero, )) for h in xrange(nonzero): pij[h] = out_degree[i[h]] * in_degree[0,j[h]] / v**2 P = 1 - binom.cdf(wij - 1,v,pij) data = P <= alpha zero_entries = np.where(data == 0) data = np.delete(data, zero_entries) i = np.delete(i, zero_entries) j = np.delete(j, zero_entries) wij = np.delete(wij,zero_entries) ij = np.asarray(zip(i,j)).T self.svnet = csc_matrix((data, ij), shape = (n,m) ) self.Adj = csc_matrix((wij, ij), shape = (n,m) ) self.nodes = Year.nodes self.filename = Year.filename self.edgetype = Year.edgetype
def test_simple(): nentries = 5 h = FastSampler(nentries, max_value=100, min_value=1) weights = array([1,1,3,5,2], dtype='d') normalized_weights = weights/sum(weights) for i in range(nentries): h.add(i, weights[i]) nsamples = 100000 distro = zeros(nentries) for i in range(nsamples): idx = h.sample() distro[idx] += 1 normalized_distro = distro / sum(distro) print distro print weights print normalized_distro print normalized_weights # Statistical test on result cdf_vals = binom.cdf(k=distro, n=nsamples, p=normalized_weights) print "CDF VALS" print cdf_vals has_bad_vals = any(logical_or(cdf_vals <= 0.005, cdf_vals >= 0.995)) # Will fail about 5% of the time due to statistics assert not has_bad_vals
def value_picks(self): # Value picks from binomial cdf points = self.points num_picks = len(points) avg_picks = sum(points) / num_picks p = {str(n+1) + ' Star': 1 - binom.cdf(n, num_picks, avg_picks) for n in range(5)} return p
def optimize_4(N, tx_SNR_range, filepath_down, filepath_up, protocol=4*10**4, downtarget=10**(-9), uptarget=10**(-9)): """We enforce a Downlink additive noise target and an Uplink additive noise target. We do NOT assume the blocklength is evenly divided among all three phases. Instead, we allocate the minimum blocklength so that Downlink meets its additive noise target. Then we allocate the minimum blocklength so that Uplink meets its additive noise target. The remaining blocklength is allocated to the XOR phase, which determines the XOR additive noise. The optimization module then finds the transmitter SNR and receiver SNR pair so that the combination of parameters will allow the protocl to meet its overall reliability target. Arguments: N {int} -- The number of nodes/users total in the control system tx_SNR_range {np.arange} -- [description] filepath_down {string} -- [description] filepath_up {string} -- [description] Keyword Arguments: protocol {int} -- The length of the entire protocol (all phases combined) in bits (default: {40,000}) downtarget {float: fraction} -- [description] (default: {10**(-9)}) uptarget {float: fraction} -- [description] (default: {10**(-9)}) Returns: np.array of length 5 -- 0. The SNR experienced at the receiver (post-fade) 1. The SNR sent at the transmitter (pre-fade) -- this is what we care about minimizing 2. The blocklength of the Downlink Phase in bits 3. The blocklength of the Uplink Phase in bits 4. The blocklength of the XOR Phase in bits """ downFile = filepath_down + str(N) + '.in' downNode = load_table(downFile) upFile = filepath_up + str(N) + '.in' upNode = load_table(upFile) for nominal_SNR in tx_SNR_range: for actual_SNR in np.arange(max(-1, nominal_SNR-90), nominal_SNR, 0.1): downbit, upbit = float("inf"), float("inf") for bit in sorted(downNode.tables.keys()): bittable = downNode.tables[bit] downfunc = interp1d(bittable[0], bittable[1], kind='linear', bounds_error=False, fill_value=(1.0, 0.0)) if downfunc(actual_SNR) <= downtarget: downbit = bit break for bit in sorted(upNode.tables.keys()): bittable = upNode.tables[bit] upfunc = interp1d(bittable[0], bittable[1], kind='linear', bounds_error=False, fill_value=(1.0, 0.0)) if upfunc(actual_SNR) <= uptarget: upbit = bit break xorbit = protocol - downbit - upbit if xorbit <= 0: continue # xorbit = max(0, 4200 - downbit - upbit) # We calculate reeddrop each time because the rate changes every time (new table) blocklength = int(xorbit / 21 / N) rate = N * 160 / xorbit * 7 / 4 if xorbit else float("inf") if rate > 1: continue k = (1-rate)*blocklength pbitdrop = Q(np.sqrt(2*10**(actual_SNR/10))) hcerr = 1 - ((1-pbitdrop)**7 + 7*pbitdrop*(1-pbitdrop)**6) hcf = 1 - (1-hcerr)**3 reeddrop = 1-binom.cdf(int(k/2), blocklength, hcf) # reeddrop = sum([nCr(blocklength, d)* hcf**d *(1-hcf)**(blocklength-d) for d in range(int(k/2)+1, blocklength)]) xor_opt = xor_analysis_opt(N, reeddrop, nominal_SNR, actual_SNR, downfunc(actual_SNR), upfunc(actual_SNR)) if 1-xor_opt <= protocol_target: return (actual_SNR, nominal_SNR, downbit, upbit, xorbit, downfunc(actual_SNR), upfunc(actual_SNR), reeddrop) return (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan) # default behavior when nothing is returned
def probability_in_interval(fun_resample, gamma_lower, gamma_upper, significance_first=0.01, significance_second=0.05, batch=5, comm=MPI.COMM_SELF, print_per_batch=False, printing=True): N_test_max = 20000 vals = np.zeros((0,)) s = "gamma_lower, gamma_upper = {}, {}".format(gamma_lower, gamma_upper) while True: vals_new_samp = bootstrap(fun_resample, batch, comm=comm) vals = np.hstack([vals, vals_new_samp]) upper_bound_pval = binom.cdf(np.sum(vals), len(vals), gamma_upper) lower_bound_pval = 1 - binom.cdf(np.sum(vals)-1, len(vals), gamma_lower) s += ("\nnp.mean(vals) = {}".format(np.mean(vals)) + "\nlen(vals) = {}".format(len(vals)) + "\nupper_bound_pval = {}".format(upper_bound_pval) + "\nlower_bound_pval = {}".format(lower_bound_pval)) if upper_bound_pval < significance_first: if lower_bound_pval < significance_second: s += '\n===\nin interval\n===' print_rank0(comm, s) return 'in interval' if 1-binom.cdf(int(np.round(np.mean(vals)*N_test_max))-1, N_test_max, gamma_lower) < significance_second: batch = len(vals) continue # Expecting less than N_test_max tests to verify lower bound s += '\n===\nbelow upper bound\n===' if printing: print_rank0(comm, s) return 'below upper bound' if lower_bound_pval < significance_first: if upper_bound_pval < significance_second: s += '\n===\nin interval\n===' if printing: print_rank0(comm, s) return 'in interval' if binom.cdf(int(np.round(np.mean(vals)*N_test_max)), N_test_max, gamma_upper) < significance_second: batch = len(vals) continue # Expecting less than N_test_max tests to verify upper bound s += '\n===\nabove lower bound\n===' if printing: print_rank0(comm, s) return 'above lower bound' batch = len(vals) if print_per_batch: if printing: print_rank0(comm, s) s = "gamma_lower, gamma_upper = {}, {}".format(gamma_lower, gamma_upper)
def binomial_distribution_vectors(n, p, v_size=None): pmf = np.array([binom.pmf(i, n, p) for i in range(n+1)]) cdf = np.array([binom.cdf(i, n, p) for i in range(n+1)]) if (v_size != None) and (v_size > n+1): # padding to the right to reach v_size length pmf = np.append(pmf, np.zeros(v_size-n)) cdf = np.append(cdf, np.ones(v_size-n)) return pmf,cdf
def uBinBound(m,k,delta=0.05): """ Calculates the upper bound of the risk, using the binomial tail approach of Langford """ if k == m: return 1. else: f = lambda x: binom.cdf( k, m, x ) - delta return bisect( f, float(k)/m, 1.0 )
def detect_link(self, other, threshold, loss_probability, incoming=True, nmax = 0): difference = self.difference(other) if difference < nmax: return False if incoming: received = self.second_moment() sent = received + difference else: sent = self.second_moment() received = self.second_moment() - difference return binom.cdf(received, round(sent), 1-loss_probability) < threshold
def pbinom(k, n): """ Compute cdf for binomial with prob = 0.5 compare to R pbinom :param k: :param n: :return: cumulative probability """ return binom.cdf(k, n, 0.5)
def binomialTailTest(counts, nTrials, pEvent, oneSided=True): counts = array(counts) mean = nTrials * pEvent if oneSided: result = zeros(counts.shape) isAboveMean = counts > mean aboveIdx = isAboveMean.nonzero() belowIdx = (~isAboveMean).nonzero() result[aboveIdx] = binom.sf(counts[aboveIdx]-1, nTrials, pEvent) result[belowIdx] = binom.cdf(counts[belowIdx], nTrials, pEvent) else: diffs = abs(counts-mean) result = binom.cdf(mean-diffs, nTrials, pEvent) result += binom.sf(mean+diffs-1, nTrials, pEvent) return result
def prob_node_vs_community(CTC,NTC, M): I = np.diag(np.ones((len(CTC), ))) CTC = np.multiply(CTC, 1 - I) NTC = np.multiply(NTC, 1 - M) ext_in_degree = CTC.sum(0) v = NTC.sum() ext_out_degree = NTC.sum(1) P = ext_out_degree * ext_in_degree / v**2 P = 1 - binom.cdf(NTC-1,v,P) return P
def simprob(self,avector): #function to calculate binomial probability of getting sim or less by chance given widths of vectors mywidth=self.width awidth=avector.width #width = (mywidth+awidth)*0.5 n=awidth p=mywidth/WordVector.dim sim = self.linsim(avector) r= math.floor(sim*awidth) prob = binom.cdf(r,n,p) return prob
def BinomialErrors_old(nobs, Nsamp, alpha=0.16): """ One sided confidence interval for a binomial test. If after Nsamp trials we obtain nobs trials that resulted in success, find c such that P(nobs/Nsamp < mle; theta = c) = alpha where theta is the success probability for each trial. Code stolen shamelessly from stackoverflow: http://stackoverflow.com/questions/13059011/is-there-any-python-function-library-for-calculate-binomial-confidence-intervals """ from scipy.stats import binom p0 = float(nobs) / float(Nsamp) to_minimise = lambda c: binom.cdf(nobs, Nsamp, c) - alpha upper_errfcn = lambda c: binom.cdf(nobs, Nsamp, c) - alpha lower_errfcn = lambda c: binom.cdf(nobs, Nsamp, c) - (1.0 - alpha) return p0, bisect(lower_errfcn, 0, 1), bisect(upper_errfcn, 0, 1)
def calculateSDRFalseMatchError(kVal, thetaVal=20, nVal=2048, wVal=40, mVal=10, cVal=5): numCellsInUnionVal = calculateNumCellsVsK(kVal, nVal, cVal, mVal) pMatchBit = float(numCellsInUnionVal)/ (nVal * mVal) pFalseMatch = 1 - binom.cdf(thetaVal, wVal, pMatchBit) return pFalseMatch