def _significance_direct(n_on, mu_bkg): """Compute significance directly via Poisson probability. Use this method for small ``n_on < 10``. In this case the Li & Ma formula isn't correct any more. TODO: add large unit test coverage (where is it numerically precise enough)? TODO: check coverage with MC simulation I'm getting a positive significance for zero observed counts and small mu_bkg. That doesn't make too much sense ... >>> stats.poisson._significance_direct(0, 2) -1.1015196284987503 >>> stats.poisson._significance_direct(0, 0.1) 1.309617799458493 """ from scipy.stats import norm, poisson # Compute tail probability to see n_on or more counts probability = poisson.sf(n_on, mu_bkg) # Convert probability to a significance significance = norm.isf(probability) return significance
def _error( value ) : '''Construct frequentist errors using Poisson distribution''' # up error: smallest lambda for which P(n<=nobs|lambda) < (1-0.68268...)/2 = 0.15865... # down error: largest lambda for which P(n>=nobs|lambda) < (1-0.68268...)/2 = 0.15865... lambda_up, lambda_down, step_size = 1.1*value, 0.9*value, float(value)/10 if value == 0 : return (0,1.8410216450100005) # save time with precomputed value if value < 1 : lambda_up, lambda_down, step_size = 1.8, 0.0, 0.1 for i in range(5) : lambda_up -= step_size; lambda_down += step_size; step_size /= 10 while poisson.cdf( value, lambda_up ) > 0.15865525393145705 : lambda_up += step_size while poisson.sf( value-1, lambda_down ) > 0.15865525393145705 : lambda_down -= step_size return (value-lambda_down,lambda_up-value)
def _significance_direct(n_on, mu_bkg): """Compute significance directly via Poisson probability. Reference: TODO (is this ever used?) """ # Compute tail probability to see n_on or more counts # Note that we're using ``k = n_on - 1`` to get the probability # for n_on included or more, because `poisson.sf(k)` returns the # probability for more than k, with k excluded # For `n_on = 0` this returns ` probability = poisson.sf(n_on - 1, mu_bkg) # Convert probability to a significance return norm.isf(probability)
def find_phase_range(periodic_blocks, phases, prob_2peak=0.01): """ xx and yy are the bayesian block decomposition of a pulsar light curve. The off pulse phase range is defined as the lowest block with 10% removed from either side. """ xx = periodic_blocks.xx yy = periodic_blocks.yy ranges = [PhaseRange(a, b) for a, b in zip(xx[0::2], xx[1::2])] heights = yy[::2] if np.allclose(heights[0], heights[-1]): ranges[0] += ranges.pop(-1) heights = heights[:-1] sorted = np.argsort(heights) min_phase = ranges[sorted[0]] if len(sorted) < 3: # if only 2 blocks, no need to merge phase = min_phase else: second_min_phase = ranges[sorted[1]] ncounts = len([p for p in phases if p in min_phase]) second_ncounts = len([p for p in phases if p in second_min_phase]) predicted_second_counts = ncounts * second_min_phase.phase_fraction / min_phase.phase_fraction prob = poisson.sf(second_ncounts, predicted_second_counts) print "Probability of there being a second peak from %s is %s" % (str(second_min_phase), prob) region_too_small = second_min_phase.phase_fraction < 0.5 * min_phase.phase_fraction height_too_different = prob < prob_2peak if height_too_different or region_too_small: if height_too_different: print "Rejecting second peak - heights are inconsistent" if region_too_small: print "Rejecting second peak - region too small" phase = min_phase else: print "Adding second peak!" phase = min_phase + second_min_phase return phase.trim(fraction=0.1)
def _significance_direct(n_observed, mu_background): """Compute significance directly via Poisson probability. Use this method for small n_observed < 10. In this case the Li & Ma formula isn't correct any more. TODO: add large unit test coverage (where is it numerically precise enough)? TODO: check coverage with MC simulation """ from scipy.stats import norm, poisson # Compute tail probability to see n_on or more counts probability = poisson.sf(n_observed, mu_background) # Convert probability to a significance significance = norm.isf(probability) return significance
def prob_return(mu_return): """ p[s, s'] = the probability of transitioning from state s to state s' after cars have been returned. Notes: Car returns are bounded by max_evening (=20). The probabilities are given by the Poisson distribution with mu = mu_return. """ prob = np.zeros((nM, nE)) for afternoon in range(nM): for returned in range(nE - afternoon): evening = afternoon + returned prob[afternoon, evening] = poisson.pmf(returned, mu_return) # Excess returns beyond what can be kept are captured by the survival function (== 1 - CDF) assert evening == max_evening prob[afternoon, evening] += poisson.sf(evening - afternoon, mu_return) assert np.isclose(prob.sum(axis=1), 1).all() return prob
def prob_request(mu_request): """ p[r, s, s'] = the probability of fullfilling r rental requests and transitioning from state s to state s'. Notes: Rental requests are bounded by the state s. The probabilities are given by the Poisson distribution with mu = mu_request. """ prob = np.zeros((nM, nM, nM)) for morning in range(nM): for rented in range(morning + 1): afternoon = morning - rented prob[rented, morning, afternoon] = poisson.pmf(rented, mu_request) # Excess requests beyond what's available are captured by the survival function (== 1 - CDF) assert afternoon == 0 prob[morning, morning, afternoon] += poisson.sf(morning, mu_request) assert np.isclose(prob.sum(axis=(0, 2)), 1).all() return prob
def cost_function(mu, n, target): """ Calculates the squared distance between the survival function (1-cdf) of a poisson function with rate mu and n observations and a the target value. Arguments: mu {float} -- Event rate, a.k.a. rate parameter n {int} -- Number of observed events target {float} -- Estimated/desired survival rate Returns: float -- Squared distance between the survival rate and the target """ return square(poisson.sf(n, mu) - target)
def test_region( insertions, # type: List[Insertion] reference_seq, # type: pyfaidx.Fasta region, # type: Tuple[str, int, int] pattern=None, # type: Optional[str] intervals=None, # type: Optional[Iterable[Tuple[str, int, int]]] total=None, # type: Optional[int] filters=None, # type: Optional[List[Callable]] insertion_trees=None # type: GenomicIntervalTree ): # type: (...) -> float """Tests a given genomic region for enrichment in insertions.""" if total is None: total = count_total(reference_seq, pattern=pattern, intervals=intervals) # Count pattern in region. region_count = count_region(reference_seq, region=region, pattern=pattern) # Sub-select insertions for region. if insertion_trees is None: insertion_trees = GenomicIntervalTree.from_objects_position( insertions, chrom_attr='seqname') region_ins = set(interval[2] for interval in insertion_trees.search(*region)) # Apply additional filter functions to insertions if given # (such as filtering on gene name/id for example). if filters is not None: for filter_func in filters: region_ins = set(ins for ins in region_ins if filter_func(ins)) # Calculate p-value. x = len(list(region_ins)) mu = len(insertions) * (region_count / total) # Note here we use loc=1, because we are interested in # calculating P(X >= x), not P(X > x) (the default # surivival function). p_val = poisson.sf(x, mu=mu, loc=1) # type: float return p_val
def __init__(self): # dynamics of the MDP process for Jack-Car rental Problem self.number_of_locations = 3 self.rental_credit = 10 self.expected_rental_requests = [3, 2, 2] self.expected_rental_returns = [3, 1, 1] self.capacity = [19, 9, 9] self.max_car_moved = 5 self.gamma = 0.9 self.cost_of_moving = [2, 0, 2] # available actions : actions can be accessed through the index self.actions = self.generate_actions() # available states : available states can be accessed through the index self.states = [ i for i in itertools.product(range(self.capacity[0] + 1), range(self.capacity[1] + 1), range(self.capacity[2] + 1)) ] # initializing the values of the states self.V = np.zeros(tuple(np.array(self.capacity) + 1), dtype=np.float) # initializing the policy array self.policy = np.zeros(tuple(np.array(self.capacity) + 1), dtype=np.int) # poisson precompute self.poisson_pmf = dict() self.poisson_sf = dict() for n, lam in itertools.product( range(-1, max(self.capacity) + 1), range( max(self.expected_rental_requests + self.expected_rental_returns) + 1)): self.poisson_pmf[(n, lam)] = poisson.pmf(n, lam) self.poisson_sf[(n, lam)] = poisson.sf(n, lam) # printing the dynamics self.print_dynamics()
def consistent(x,xphase,y,yphase,probability = 0.05, quiet=True): """ Assuming x counts are observed in a phase range xphase and y counts are observed in phase range yphase, decides if the regions are consistent. The regions are consistent if the probability of obtaining as many or more counts in the second region compared to the first region is > 5% (so there is a >5% probability that ht esecond region is not unusually large). """ y_predicted = x*(yphase/xphase) poisson_likelihood = poisson.sf(y,y_predicted) if not quiet: print 'poisson likelihood=%.2f' % poisson_likelihood if poisson_likelihood > 0.05: return True return False
def test_region( insertions, # type: List[Insertion] reference_seq, # type: pyfaidx.Fasta region, # type: Tuple[str, int, int] pattern=None, # type: Optional[str] intervals=None, # type: Optional[Iterable[Tuple[str, int, int]]] total=None, # type: Optional[int] filters=None, # type: Optional[List[Callable]] insertion_trees=None # type: GenomicIntervalTree ): # type: (...) -> float """Tests a given genomic region for enrichment in insertions.""" if total is None: total = count_total( reference_seq, pattern=pattern, intervals=intervals) # Count pattern in region. region_count = count_region(reference_seq, region=region, pattern=pattern) # Sub-select insertions for region. if insertion_trees is None: insertion_trees = GenomicIntervalTree.from_objects_position( insertions, chrom_attr='seqname') region_ins = set(interval[2] for interval in insertion_trees.search(*region)) # Apply additional filter functions to insertions if given # (such as filtering on gene name/id for example). if filters is not None: for filter_func in filters: region_ins = set(ins for ins in region_ins if filter_func(ins)) # Calculate p-value. x = len(list(region_ins)) mu = len(insertions) * (region_count / total) # Note here we use loc=1, because we are interested in # calculating P(X >= x), not P(X > x) (the default # surivival function). p_val = poisson.sf(x, mu=mu, loc=1) # type: float return p_val
def compute_fdr(df, total_chip_reads, total_input_reads, args): df.to_csv("for_fdr_test.csv", sep=" ") print("total_chip_reads", total_chip_reads) print("total_input_reads", total_input_reads) total_island_input_reads = df.Input.sum() # Hack needed in case we run on test data # TODO: why does SICER not need this? Different genome versions? # run with FDR=1 on original SICER and get this island: # chr7 61606400 61606799 3 2 0.167427550906 1.40719467956 0.1674275 50906 # does it not show up with epic. if total_island_input_reads == 0: total_island_input_reads = 2 scaling_factor = (total_chip_reads * 1.0) / total_input_reads effective_genome_size = get_effective_genome_length(args.genome) zero_controls_multiplier = total_input_reads * 1.0 / effective_genome_size avg_0_denom = (df.End - df.Start + 1) * zero_controls_multiplier avg_0_denom[avg_0_denom > 0.25] = 0.25 avg_0_denom = avg_0_denom * scaling_factor avg = df.Input * scaling_factor avg[df.Input == 0] = avg_0_denom[df.Input == 0] df.P_value = poisson.sf(df.ChIP, avg) no_differential_expression = df.ChIP <= avg df.loc[no_differential_expression, "P_value"] = 1 df.Fold_change = df.ChIP / avg ranked_p_values = rankdata(df.P_value) df.FDR_value = df.P_value * len(df) / ranked_p_values fdr_too_high = df.FDR_value > 1 df.loc[fdr_too_high, "FDR_value"] = 1 df = df[df.FDR_value < args.false_discovery_rate_cutoff] return df
def event_significance(self, nevents=10, rank_fcn=None): """ Calculate the Poissonian significance of the 'on source' trial set for up to the loudest nevents. """ if rank_fcn is None: rank_fcn = lambda e: e.snr offtime = float(abs(segments.segmentlist(self.offsource.keys()))) offsource = sorted(chain(*self.offsource.values()), key=lambda sb: -sb.snr) offrate = zip(offsource, map(lambda i: i / offtime, range(1, len(offsource) + 1))) offrate = offrate[::-1] offsource = offsource[::-1] offsnr = map(rank_fcn, offsource) ontime = float(abs(segments.segmentlist(self.onsource.keys()))) if ontime == 0: return [] onsource = sorted(chain(*self.onsource.values()), key=lambda sb: -sb.snr) onsnr = map(rank_fcn, onsource) onrate = [] for snr in onsnr: try: onrate.append(offrate[bisect_left(offsnr, snr)][1]) except IndexError: # on SNR > max off SNR onrate.append(0) onsource_sig = [] for i, sb in enumerate(onsource[:nevents]): # From Gaussian #exp_num = chi2.cdf(sb.chisq_dof, sb.snr)*len(onsource) # From off-source exp_num = onrate[i] * ontime # FIXME: requires scipy >= 0.10 #onsource_sig.append([sb.snr, -poisson.logsf(i, exp_num)]) onsource_sig.append( [rank_fcn(sb), -numpy.log(poisson.sf(i, exp_num))]) return onsource_sig
def pval_calculator(self, v): """ Calculate the p-value of the v-motifs number of two vertices :param v: a list containing the index of the first vertex, index of the second vertex, number of V-motifs between them. :returns: a list containing the index of the first vertex, index of the second vertex, the relative p-value. """ i = v[0] j = v[1] if self.method == 'poisson': if self.light_mode: avg_v = np.sum( v_probs_from_fitnesses(self.x[i], self.x[j], self.y)) else: avg_v = self.avg_v_mat[i, j] return i, j, poisson.sf(k=v[2] - 1, mu=avg_v) elif self.method == 'normal': if self.light_mode: probs = v_probs_from_fitnesses(self.x[i], self.x[j], self.y) else: probs = self.avg_mat[i] * self.avg_mat[j] avg_v = np.sum(probs) sigma_v = np.sqrt(np.sum(probs * (1 - probs))) return i, j, norm.cdf((v[2] + 0.5 - avg_v) / sigma_v) elif self.method == 'rna': if self.light_mode: probs = v_probs_from_fitnesses(self.x[i], self.x[j], self.y) else: probs = self.avg_mat[i] * self.avg_mat[j] avg_v = np.sum(probs) var_v_arr = probs * (1 - probs) sigma_v = np.sqrt(np.sum(var_v_arr)) gamma_v = (sigma_v**(-3)) * np.sum(var_v_arr * (1 - 2 * probs)) eval_x = (v[2] + 0.5 - avg_v) / sigma_v pval_temp = norm.cdf( eval_x) + gamma_v * (1 - eval_x**2) * norm.pdf(eval_x) / 6 if pval_temp < 0: return i, j, 0 elif pval_temp > 1: return i, j, 1 else: return i, j, pval_temp
def glscore_V3(sids, otuID, otutab): '''Write something informative here''' sids = dropNA(sids) coverages = [int(otutab.getSampleCoverage(sid)) for sid in sids] abnds = [int(otutab.getOTUabundance(otuID, sid)) for sid in sids] t0expAbnd = average([(float(abnds[i]) / coverages[i]) * coverages[0] for i in range(1, len(sids)) ]) # `max' instead of `average'? tTexpAbnds = [(float(abnds[0]) / coverages[0]) * coverages[i] for i in range(1, len(sids))] if abnds[0] == 0: lsc = 0 # If we don't expect to see much, if any, of the OTU at t0 due # to low sample coverage, then we should penalise the gain score penalty = poisson.sf(0, t0expAbnd) # ceiling(t0expAbnd, sigC) / sigC # If the OTU isn't consistently there, then it could be # an OTU that fluctuates naturally and wasn't gained prss = [ presence_sc(sids[i], sids[i + 1], otuID, otutab) for i in range(1, len(sids) - 1) ] prss = [prss[i] * prss[i + 1] for i in range(len(prss) - 1)] prs = average(prss) gsc = prs * penalty if abnds[0] > 0: gsc = 0 # I want to penalise the loss scores of anything that could # have been lost because of uneven sample coverage lscs = [ loss_sc(abnds[i + 1], tTexpAbnds[i]) for i in range(len(tTexpAbnds)) ] lscs = [lscs[i] * lscs[i + 1] for i in range(len(lscs) - 1)] lsc = average(lscs) output = add_block(sids) + add_block(coverages) + add_block(abnds) \ + add_block([t0expAbnd] + tTexpAbnds) output.append('') output.append('{0:.5f}'.format(gsc)) output.append('{0:.5f}'.format(lsc)) return output
def compute_fdr(df, total_chip_reads, total_input_reads, args): # type: (pd.DataFrame, int, int, Namespace) -> pd.DataFrame total_island_input_reads = df.Input.sum() # Hack needed in case we run on test data # TODO: why does SICER not need this? Different genome versions? # run with FDR=1 on original SICER and get this island: # chr7 61606400 61606799 3 2 0.167427550906 1.40719467956 0.1674275 50906 # does it not show up with epic? if total_island_input_reads == 0: total_island_input_reads = 2 scaling_factor = (total_chip_reads * 1.0) / total_input_reads zero_controls_multiplier = total_input_reads * 1.0 / args.effective_genome_fraction avg_0_denom = (df.End - df.Start + 1) * zero_controls_multiplier avg_0_denom[avg_0_denom > 0.25] = 0.25 avg_0_denom = avg_0_denom * scaling_factor avg = df.Input * scaling_factor avg[df.Input == 0] = avg_0_denom[df.Input == 0] fold_change = df.ChIP / avg log2FC = log2(fold_change) df.insert(len(df.columns), "Log2FC", log2FC) p_vals = pd.Series(poisson.sf(df.ChIP, avg), index=df.index) p_vals[df.ChIP <= avg] = 1 df.insert(len(df.columns), "P", p_vals) ranked_p_values = rankdata(p_vals) fdr = p_vals * len(df) / ranked_p_values fdr[fdr > 1] = 1 df.insert(len(df.columns), "FDR", fdr) df = df[df.FDR < args.false_discovery_rate_cutoff] return df
def get_pvals_chunk(counts_series_lchunk): """ Parameters: ----------- counts_series_lchunk : pd.Series(int) Series of raw pixel counts where the name of the Series is pd.Interval of the lambda-bin where the pixel belong. I.e. counts_series_lchunk.name.right - is the upper limit of the chunk and is used as "expected" in Poisson distribution to estimate p-value. Returns: -------- pvals: ndarray[float] array of p-values for each pixel Notes: ------ poisson.sf = 1.0 - poisson.cdf """ return poisson.sf(counts_series_lchunk.values, counts_series_lchunk.name.right)
def estSigOneChr(rs, jdf, pre, dis=0, win=5): """ Estimating the significances for the loops in one chromosome. """ #all variables with suffix t is treatment, with suffix c in control logger.info("Building genomic coverage model for %s" % jdf) model, N = getGenomeCoverage(jdf, dis) ds = {} i = 0 for key, r in rs.items(): i += 1 if i % 100 == 0: report = "Estimating %s loops for %s" % (i, pre) cFlush(report) chrom = r[0] iva = [r[1], r[2]] ivb = [r[4], r[5]] ra, rb, rab = getPETsforRegions(iva, ivb, model) ivas, ivbs = getNearbyPairRegions(iva, ivb, win=win) mrab = getPermutatedBg(ivas, ivbs, model) if mrab > 0: es = rab / mrab else: es = 100 pop = max([1e-300, poisson.sf(rab - 1.0, mrab)]) niva = "%s:%s-%s" % (chrom, iva[0], iva[1]) nivb = "%s:%s-%s" % (chrom, ivb[0], ivb[1]) ds[key] = { "iva": niva, "ivb": nivb, "ra": ra, "rb": rb, "rab": rab, "ES": es, "poisson_p-value": pop, } if len(ds) == 0: return None ds = pd.DataFrame(ds).T return ds
def test_enrich(expected, observed, columns): """ tests whether genes are enriched with de novo mutations Args: expected: pandas dataframe of expected numbers of mutations per gene, given expected mutation rates for each gene. observed: pandas data frame with tally of de novo mutations per gene for each of the mutation types: lof_snv, lof_indel, missense_snv, missense_indel. columns: list of columns to use to calculate enrichment within, such as the loss-of-function columns ["lof_snv", "lof_indel"]. Returns: pandas Series of P-values from testing for enrichment. """ # recode the columns in the expected mutations table, so merging the # expected and observed datasets doesn't have conflicting column names. expected_columns = [ x + "_expected" for x in columns ] rename = dict(zip(columns, expected_columns)) expected = expected.rename(columns=rename) if 'hgnc' not in observed: observed['hgnc'] = observed['symbol'] enriched = observed.merge(expected, how="left", on=["hgnc", "chrom"]) # account for how different pandas versions sum series with only NA kwargs = {} if pandas.__version__ >= '0.22.0': kwargs = {'min_count': 1} # sum the observed and expected de novo mutations per gene observed = enriched[columns].sum(axis=1, **kwargs) expected = enriched[expected_columns].sum(axis=1, **kwargs) # calculate the probability of getting the observed number of de novos, # given the expected rate of mutations. return poisson.sf(observed - 1, expected)
def compute_small_belief(c, m, l): mu = l * m csd = c * (l * m)**0.5 mu_all_but_one = l * (m - 1) max_text = 10000 delta1 = poisson.sf(mu_all_but_one + csd, mu_all_but_one) delta2 = poisson.cdf(mu_all_but_one - csd, mu_all_but_one) #delta1 = poisson.pmf(l-1,l) * poisson.sf(mu_all_but_one + csd, mu_all_but_one) #for i in xrange(1,max_text): # delta1 += poisson.pmf(l + i,l) * poisson.pmf(mu_all_but_one + csd - i, mu_all_but_one) #delta1 += poisson.pmf(l + max_text,l) # #delta2 = poisson.pmf(0,l) * poisson.sf(mu + csd, mu_all_but_one) #for i in xrange(1,max_text): # delta2 += poisson.pmf(l + i,l) * poisson.pmf(mu + csd - i, mu_all_but_one) #delta2 += poisson.pmf(l + max_text,l) delta = (delta1 + delta2) epsilon = math.log(1 + (float(c) / (m * l)**0.5)) return epsilon, delta
def countTEs(f, repf, fout, psedo=1, ext=5): """ Count reads located in TEs and get their enrichment. """ t, model = getCov(f) reps = pd.read_table(repf, index_col=0, sep="\t") ds = {} for rep in tqdm(list(reps.itertuples())): rid = rep[0] iv = HTSeq.GenomicInterval(rep[1], rep[2], rep[3]) c, rpkm = getCount(t, model, iv) if c == 0: continue upiv = HTSeq.GenomicInterval(rep[1], rep[2] - iv.length * ext, rep[2]) upc, uprpkm = getCount(t, model, upiv) downiv = HTSeq.GenomicInterval(rep[1], rep[3], rep[3] + iv.length * ext) downc, downrpkm = getCount(t, model, downiv) if upc + downc > 0: es = c / 1.0 / (upc + downc) * 2 * ext p = max([1e-300, poisson.sf(c, (upc + downc) / 2.0 / ext)]) else: es = c / 1.0 / psedo p - 1e-300 ds[rid] = { "length": iv.length, "count": c, "RPKM": rpkm, "up_count_ext%s" % ext: upc, "up_RPKM_ext%s" % ext: uprpkm, "down_count_ext%s" % ext: downc, "down_RPKM_ext%s" % ext: downrpkm, "ES": es, "poisson_p-value": p, #"ES": rpkm / 1.0 / #(uprpkm + downrpkm + psedo) * 2, #psedo count to avoid divid zero } ds = pd.DataFrame(ds).T ds.to_csv(fout + ".txt", sep="\t")
def test_enrich(expected, observed, columns): """ tests whether genes are enriched with de novo mutations Args: expected: pandas dataframe of expected numbers of mutations per gene, given expected mutation rates for each gene. observed: pandas data frame with tally of de novo mutations per gene for each of the mutation types: lof_snv, lof_indel, missense_snv, missense_indel. columns: list of columns to use to calculate enrichment within, such as the loss-of-function columns ["lof_snv", "lof_indel"]. Returns: pandas Series of P-values from testing for enrichment. """ # recode the columns in the expected mutations table, so merging the # expected and observed datasets doesn't have conflicting column names. expected_columns = [x + "_expected" for x in columns] rename = dict(zip(columns, expected_columns)) expected = expected.rename(columns=rename) if 'hgnc' not in observed: observed['hgnc'] = observed['symbol'] enriched = observed.merge(expected, how="left", on=["hgnc", "chrom"]) # account for how different pandas versions sum series with only NA kwargs = {} if pandas.__version__ >= '0.22.0': kwargs = {'min_count': 1} # sum the observed and expected de novo mutations per gene observed = enriched[columns].sum(axis=1, **kwargs) expected = enriched[expected_columns].sum(axis=1, **kwargs) # calculate the probability of getting the observed number of de novos, # given the expected rate of mutations. return poisson.sf(observed - 1, expected)
def stestat(detector,background): stat = OrderedDict() stat['Name'] = detector.name stat['actual sigma +'] = detector.actualsig_pos stat['actual sigma -'] = detector.actualsig_neg stat['Alarm Setting'] = detector.alarm stat['Micro Rad Per Hr'] = ( (100*detector.t_energy*conv) / (detector.mass_kg*(detector.source_time/3600)) )*(10**6) stat['Sigma Rad'] = ((100*detector.sig_energy*conv) / (detector.mass_kg*(detector.source_time/3600)) )*(10**6) stat['Source Hits Per Sec'] = detector.rate stat['Source Sigma Hit Rate'] = np.sqrt(detector.counts)/detector.source_time stat['Background Hit Rate'] = background.rate stat['Sigma Background Hit Rate'] = background.sig_counts stat['Combined Hit Rate'] = (detector.rate) + (background.rate) stat['Sigma Combined Hit Rate'] = np.sqrt( stat['Source Sigma Hit Rate']**2 + (background.sig_counts)**2 ) stat['Sigma Above Background'] = detector.rate*detector.itime / background.sig_counts stat['Hits Required to alarm'] = stat['Sigma Background Hit Rate']*stat['Alarm Setting'] mean = stat['Combined Hit Rate'] sigma = stat['Sigma Combined Hit Rate'] alarm = background.sig_counts*detector.alarm # value needed to set off alarm stat['Probability to Alarm'] = poisson.sf(alarm,mean) return stat
def enrichment(observed, expected): ''' assess enrichment of de novo mutations ''' groups = { 'PTV': ['lof_snv', 'lof_indel'], 'PAV': ['missense_snv', 'missense_indel'] } data = {} for x in groups: obs = sum([observed[x].sum() for x in groups[x]]) exp = sum([expected[x].sum() for x in groups[x]]) ratio = obs / exp p_value = poisson.sf(obs - 1, exp) data[x] = { 'ratio': ratio, 'p_value': p_value, 'observed': obs, 'expected': exp } return data
def _significance_direct(n_on, mu_bkg): """Compute significance directly via Poisson probability. Use this method for small ``n_on < 10``. In this case the Li & Ma formula isn't correct any more. TODO: add large unit test coverage (where is it numerically precise enough)? TODO: check coverage with MC simulation I'm getting a positive significance for zero observed counts and small mu_bkg. That doesn't make too much sense ... >>> stats.poisson._significance_direct(0, 2) -1.1015196284987503 >>> stats.poisson._significance_direct(0, 0.1) 1.309617799458493 """ # Compute tail probability to see n_on or more counts probability = poisson.sf(n_on, mu_bkg) # Convert probability to a significance significance = norm.isf(probability) return significance
def ministat(detector, background): stat = OrderedDict() stat['Name'] = detector.name stat['Alarm Setting'] = detector.alarm stat['Actual Distances'] = detector.actualdist stat['Actual Probability'] = detector.actualprob stat['Actual Sigma +'] = detector.actualsig_pos stat['Actual Sigma -'] = detector.actualsig_neg stat['Micro Rad Per Hr'] = ( (100*detector.t_energy*conv) / (detector.mass_kg*(detector.source_time/3600)) )*(10**6) stat['Sigma Rad'] = ((100*detector.sig_energy*conv) / (detector.mass_kg*(detector.source_time/3600)) )*(10**6) stat['Source Hits Per Sec'] = detector.rate stat['Source Sigma Hit Rate'] = np.sqrt(detector.counts)/detector.source_time stat['Background Hit Rate'] = background.rate stat['Sigma Background Hit Rate'] = background.sig_counts stat['Combined Hit Rate'] = (detector.rate) + (background.rate) stat['Sigma Combined Hit Rate'] = np.sqrt( stat['Source Sigma Hit Rate']**2 + (background.sig_counts)**2 ) stat['Base Alarm Count'] = background.base_alarm stat['Rate Above Base Alarm Rate'] = stat['Combined Hit Rate'] - stat['Base Alarm Count'] stat['alarm level'] = detector.alarm_lv(background) mean = stat['Combined Hit Rate'] alarm = stat['Base Alarm Count'] # value needed to set off alarm stat['Probability to Alarm'] = poisson.sf(alarm,mean) return stat
def cdf(a, r, mu): if (a > 0): return poisson.sf(r - 1, a / mu) elif (a == 0): return 0
def process_one_replicon(rep_name, replicon_length, masked_bases, start_positions, window_size, out_dir): random.shuffle(start_positions) start_position_count = len(start_positions) // 2 start_positions = start_positions[:start_position_count] read_starts_per_window = get_read_starts_per_window( replicon_length, window_size, start_positions) all_read_starts = [] for window, read_starts in read_starts_per_window.items(): masked_window = False for i in range(window, window + window_size): if i in masked_bases: masked_window = True if not masked_window: all_read_starts.append(read_starts) mean_read_starts_per_window = statistics.mean(all_read_starts) sig_threshold = 0.05 / len(all_read_starts) neg_log10_sig_threshold = -math.log10(sig_threshold) print( f'{rep_name}\t{mean_read_starts_per_window}\t{sig_threshold}\t{neg_log10_sig_threshold}' ) out_filename = out_dir / (rep_name + '.tsv') with open(out_filename, 'wt') as out_file: out_file.write( 'window\tread_starts\tp_val\tneg_log10_p_val\tsigned_neg_log10_p_val\n' ) for window, read_starts in read_starts_per_window.items(): masked_window = False for i in range(window, window + window_size): if i in masked_bases: masked_window = True # High numbers of read-starts if read_starts > mean_read_starts_per_window: p_val = poisson.sf(read_starts - 1, mean_read_starts_per_window) try: neg_log10_p_val = -math.log10(p_val) signed_neg_log10_p_val = neg_log10_p_val except ValueError: neg_log10_p_val = 'inf' signed_neg_log10_p_val = 'inf' # Low numbers of read-starts else: p_val = poisson.cdf(read_starts, mean_read_starts_per_window) try: neg_log10_p_val = -math.log10(p_val) signed_neg_log10_p_val = -neg_log10_p_val except ValueError: neg_log10_p_val = 'inf' signed_neg_log10_p_val = '-inf' if not masked_window: out_file.write( f'{window}\t{read_starts}\t{p_val}\t{neg_log10_p_val}\t{signed_neg_log10_p_val}\n' ) else: out_file.write(f'{window}\t{read_starts}\tn/a\tn/a\tn/a\n')
plt.ylabel('Probability') ax = plt.gca() line_top = ax.get_ylim()[1] format_axis(ax) ax.yaxis.set_ticks_position('none') ax.set_yticklabels([]) ax.yaxis.labelpad = 0 plt.vlines(min_pore_size, 0, line_top, linestyle='--') plt.ylim([0, line_top]) plt.subplots_adjust(left=0.12, bottom=0.17) # Now make inset plot showing dye release at different average Bax/lipo # ratios: ax = plt.axes([0.55, 0.55, 0.3, 0.3]) format_axis(ax) bax_ratios2 = np.linspace(0, 20, 50) plt.plot(bax_ratios2, poisson.sf(sub_pore_size, bax_ratios2), color='k') for br in bax_ratios: plt.plot(br, poisson.sf(sub_pore_size, br), marker='o', markersize=4) ax.set_yticks([0, 0.5, 1.0]) plt.ylim([-0.08, 1.05]) plt.xlabel(r'$\langle$Bax/Lipo$\rangle$') plt.ylabel('Max release') # Save the plot plt.savefig('poisson_bax_fmax.pdf') # Now, plot best fit of 140311 Fmax curve with Poisson funcs (fmax_arr, conc_list) = get_twoexp_fmax_arr() fmax_means = np.mean(fmax_arr, axis=0) bax_ratios = conc_list / 5.16 log_ratios = np.log10(bax_ratios) fmax_arr[:, 0] = [0, 0, 0]
def erlang_cdf(a, k, mu): if (a > 0): return(poisson.sf(k = k - 1, mu = a / mu)) else: return(0)
def test_logpmf_zero(self): poisson_logpmf = poisson.logpmf(2, 2) - np.log(poisson.sf(0, 2)) tpoisson_logpmf = truncatedpoisson.logpmf(2, 2, 0) assert_allclose(poisson_logpmf, tpoisson_logpmf, rtol=1e-7)
def fl_cycle_application_decision(): """use the temporary req_join endpoint to mockup: - reject if worker does not satisfy 'minimum_upload_speed' and/or 'minimum_download_speed' - is a part of current or recent cycle according to 'do_not_reuse_workers_until_cycle' - selects according to pool_selection - is under max worker (with some padding to account for expected percent of workers so do not report successfully) """ # parse query strings (for now), eventually this will be parsed from the request body model_id = request.args.get("model_id") up_speed = request.args.get("up_speed") down_speed = request.args.get("down_speed") worker_id = request.args.get("worker_id") worker_ping = request.args.get("ping") _cycle = cycle_manager.last(model_id) _accept = False """ MVP variable stubs: we will stub these with hard coded numbers first, then make functions to dynaically query/update in subsquent PRs """ # this will be replaced with a function that check for the same (model_id, version_#) tuple when the worker last participated last_participation = 1 # how late is too late into the cycle time to give a worker "new work", if only 5 seconds left probably don't bother, set this intelligently later MINIMUM_CYCLE_TIME_LEFT = 500 # the historical amount of workers that fail to report (out of time, offline, too slow etc...), # could be modified to be worker/model specific later, track across overall pygrid instance for now EXPECTED_FAILURE_RATE = 0.2 dummy_server_config = { "max_workers": 100, "pool_selection": "random", # or "iterate" "num_cycles": 5, "do_not_reuse_workers_until_cycle": 4, "cycle_length": 8 * 60 * 60, # 8 hours "minimum_upload_speed": 2000, # 2 mbps "minimum_download_speed": 4000, # 4 mbps } """ end of variable stubs """ _server_config = dummy_server_config up_speed_check = up_speed > _server_config["minimum_upload_speed"] down_speed_check = down_speed > _server_config["minimum_download_speed"] cycle_valid_check = (( last_participation + _server_config["do_not_reuse_workers_until_cycle"] >= _cycle.get( "cycle_sequence", 99999) # this should reuturn current cycle sequence number ) * (_cycle.get("cycle_sequence", 99999) <= _server_config["num_cycles"]) * (_cycle.cycle_time > MINIMUM_CYCLE_TIME_LEFT) * (worker_id not in _cycle._workers)) if up_speed_check * down_speed_check * cycle_valid_check: if _server_config["pool_selection"] == "iterate" and len( _cycle._workers) < _server_config["max_workers"] * ( 1 + EXPECTED_FAILURE_RATE): """first come first serve selection mode.""" _accept = True elif _server_config["pool_selection"] == "random": """probabilistic model for rejection rate: - model the rate of worker's request to join as lambda in a poisson process - set probabilistic reject rate such that we can expect enough workers will request to join and be accepted - between now and ETA till end of _server_config['cycle_length'] - such that we can expect (,say with 95% confidence) successful completion of the cycle - while accounting for EXPECTED_FAILURE_RATE (% of workers that join cycle but never successfully report diff) EXPECTED_FAILURE_RATE = moving average with exponential decay based on historical data (maybe: noised up weights for security) k' = max_workers * (1+EXPECTED_FAILURE_RATE) # expected failure adjusted max_workers = var: k_prime T_left = T_cycle_end - T_now # how much time is left (in the same unit as below) normalized_lambda_actual = (recent) historical rate of request / unit time lambda' = number of requests / unit of time that would satisfy the below equation probability of receiving at least k' requests per unit time: P(K>=k') = 0.95 = e ^ ( - lambda' * T_left) * ( lambda' * T_left) ^ k' / k'! = 1 - P(K<k') var: lambda_approx = lambda' * T_left solve for lambda': use numerical approximation (newton's method) or just repeatedly call prob = poisson.sf(x, lambda') via scipy reject_probability = 1 - lambda_approx / (normalized_lambda_actual * T_left) """ # time base units = 1 hr, assumes lambda_actual and lambda_approx have the same unit as T_left k_prime = _server_config["max_workers"] * (1 + EXPECTED_FAILURE_RATE) T_left = _cycle.get("cycle_time", 0) # TODO: remove magic number = 5 below... see block comment above re: how normalized_lambda_actual = 5 lambda_actual = ( normalized_lambda_actual * T_left ) # makes lambda_actual have same unit as lambda_approx # @hyperparam: valid_range => (0, 1) | (+) => more certainty to have completed cycle, (-) => more efficient use of worker as computational resource confidence = 0.95 # P(K>=k') pois = lambda l: poisson.sf(k_prime, l) - confidence """ _bisect_approximator because: - solving for lambda given P(K>=k') has no algebraic solution (that I know of) => need approxmiation - scipy's optimizers are not stable for this problem (I tested a few) => need custom approxmiation - at this MVP stage we are not likely to experince performance problems, binary search is log(N) refactor notes: - implmenting a smarter approximiator using lambert's W or newton's methods will take more time - if we do need to scale then we can refactor to the above ^ """ # @hyperparam: valid_range => (0, 1) | (+) => get a faster but lower quality approximation _search_tolerance = 0.01 def _bisect_approximator(arr, search_tolerance=_search_tolerance): """uses binary search to find lambda_actual within search_tolerance.""" n = len(arr) L = 0 R = n - 1 while L <= R: mid = floor((L + R) / 2) if pois(arr[mid]) > 0 and pois( arr[mid]) < search_tolerance: return mid elif pois(arr[mid]) > 0 and pois( arr[mid]) > search_tolerance: R = mid - 1 else: L = mid + 1 return None """ if the number of workers is relatively small: - approximiation methods is not neccessary / we can find exact solution fast - and search_tolerance is not guaranteed because lambda has to be int() """ if k_prime < 50: lambda_approx = np.argmin( [abs(pois(x)) for x in range(floor(k_prime * 3))]) else: lambda_approx = _bisect_approximator(range(floor(k_prime * 3))) rej_prob = ( (1 - lambda_approx / lambda_actual) if lambda_actual > lambda_approx else 0 # don't reject if we expect to be short on worker requests ) # additional security: if (k_prime > 50 and abs(poisson.sf(k_prime, lambda_approx) - confidence) > _search_tolerance): """something went wrong, fall back to safe default.""" rej_prob = 0.1 WARN = "_bisect_approximator failed unexpectedly, reset rej_prob to default" logging.exception(WARN) # log error if random.random_sample() < rej_prob: _accept = True if _accept: return Response( json.dumps({"status": "accepted" }), # leave out other accpet keys/values for now status=200, mimetype="application/json", ) # reject by default return Response( json.dumps({"status": "rejected"}), # leave out other accpet keys/values for now status=400, mimetype="application/json", )
def call_peaks(foreground_read_counts, total_foreground_reads, background_read_counts, total_background_reads, bin_size, p_value_extend, q_value_seed, min_gap, min_expected_reads, use_broad_window_for_background=False): SHORT_WINDOW = max(1, 500 / bin_size) # 1 kb / 2 MEDIUM_WINDOW = max(1, 2500 / bin_size) # 5 kb / 2 LONG_WINDOW = max(1, 10000 / bin_size) # 20 kb / 2 if use_broad_window_for_background: background_read_counts = foreground_read_counts total_background_reads = total_foreground_reads LONG_WINDOW = max(1, 25000 / bin_size) # 50 kb / 2 pseudo_one_read = float( min_expected_reads * total_background_reads) / total_foreground_reads n_total_bins = sum( len(bins) for bins in foreground_read_counts.itervalues()) mean_background_reads = float(total_background_reads) / n_total_bins expected_read_counts = dict((c, [0] * len(foreground_read_counts[c])) for c in foreground_read_counts) if total_background_reads == 0: echo('Using average reads per bin as expected:', total_foreground_reads / float(n_total_bins)) peaks = {} poisson_cache = {} echo('Calling significant bins') for chrom in foreground_read_counts: peaks[chrom] = [0] * len(foreground_read_counts[chrom]) short_window = sum(background_read_counts[chrom][:SHORT_WINDOW]) short_window_length = SHORT_WINDOW medium_window = sum(background_read_counts[chrom][:MEDIUM_WINDOW]) medium_window_length = MEDIUM_WINDOW long_window = sum(background_read_counts[chrom][:LONG_WINDOW]) long_window_length = LONG_WINDOW for bin_idx in xrange(len(foreground_read_counts[chrom])): fgr_reads = foreground_read_counts[chrom][bin_idx] if bin_idx >= SHORT_WINDOW: short_window -= background_read_counts[chrom][bin_idx - SHORT_WINDOW] else: short_window_length += 1 if bin_idx + SHORT_WINDOW < len(background_read_counts[chrom]): short_window += background_read_counts[chrom][bin_idx + SHORT_WINDOW] else: short_window_length -= 1 if bin_idx >= MEDIUM_WINDOW: medium_window -= background_read_counts[chrom][bin_idx - MEDIUM_WINDOW] else: medium_window_length += 1 if bin_idx + MEDIUM_WINDOW < len(background_read_counts[chrom]): medium_window += background_read_counts[chrom][bin_idx + MEDIUM_WINDOW] else: medium_window_length -= 1 if bin_idx >= LONG_WINDOW: long_window -= background_read_counts[chrom][bin_idx - LONG_WINDOW] else: long_window_length += 1 if bin_idx + LONG_WINDOW < len(background_read_counts[chrom]): long_window += background_read_counts[chrom][bin_idx + LONG_WINDOW] else: long_window_length -= 1 if use_broad_window_for_background: bgr_reads = max( float(long_window) / long_window_length, mean_background_reads, pseudo_one_read) expected_reads = total_foreground_reads * bgr_reads / float( total_background_reads) else: if total_background_reads > 0: bgr_reads = max( float(short_window) / short_window_length, float(medium_window) / medium_window_length, float(long_window) / long_window_length, mean_background_reads, pseudo_one_read) expected_reads = total_foreground_reads * bgr_reads / float( total_background_reads) else: expected_reads = max( 1., total_foreground_reads / float(n_total_bins)) # cache the Poisson test key = (fgr_reads - 1, expected_reads) if key not in poisson_cache: poisson_cache[key] = poisson.sf(fgr_reads - 1, mu=expected_reads) peaks[chrom][bin_idx] = poisson_cache[key] expected_read_counts[chrom][bin_idx] = expected_reads echo('Computing p-value threshold at FDR of', q_value_seed) sorted_p_values = sorted([p for chrom in peaks for p in peaks[chrom]]) n = len(sorted_p_values) q_value_strong = None for i, p_value in enumerate(sorted_p_values): if float(n * p_value) / (i + 1) <= q_value_seed: q_value_strong = p_value echo('p-value threshold:', q_value_strong) if q_value_strong is None: echo( 'ERROR: No significant peaks are found for this time point!\n' 'Please, check your data and consider removing this time point or ' 'relaxing the FDR threshold with the --q-value-seed option.') exit(1) merged_peaks = {} for chrom in peaks: chrom_peaks = peaks[chrom] peak_bins = [] in_peak = False peak_start = None n_bins = len(peaks[chrom]) for bin_idx in xrange(n_bins): is_significant = (chrom_peaks[bin_idx] <= q_value_strong) if not in_peak and is_significant: in_peak = True peak_start = bin_idx if (not is_significant or bin_idx == n_bins - 1) and in_peak: peak_bins.append([peak_start, bin_idx]) in_peak = False for peak_idx in xrange(len(peak_bins)): peak_start, peak_end = peak_bins[peak_idx] boundary = peak_start while boundary >= 0 and chrom_peaks[boundary] <= p_value_extend: boundary -= 1 peak_start = boundary + 1 boundary = peak_end while boundary < n_bins and chrom_peaks[boundary] <= p_value_extend: boundary += 1 peak_end = boundary peak_bins[peak_idx] = [peak_start, peak_end] merged_peaks[chrom] = merge_intervals(peak_bins, min_gap=min_gap) return merged_peaks, expected_read_counts
def __call__(self, m): #lower_limits = np.ceil(m[:,None]*self.Ls[None,:]/self.Lavg)-1+0.1 #return (poisson.sf(lower_limits, self.expected_ns[None,:])).sum(axis=1) lower_limits = np.ceil(m[:,None]*self.Ls[None,:]/self.Lavg)-2+0.1 return (poisson.sf(lower_limits, self.expected_ns[None,:])*self.ps[None,:]).sum(axis=1)
]) mu = np.mean(obs) print('mu = {}'.format(mu)) # Show the distribution sns.set(style="white", palette="muted", color_codes=True) fig, ax = plt.subplots(figsize=(14, 7), frameon=False) sns.distplot(obs, kde=True, color="b", ax=ax) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.show() # Print some probabilities print('P(more than 8 trains) = {}'.format(poisson.sf(8, mu))) print('P(more than 9 trains) = {}'.format(poisson.sf(9, mu))) print('P(more than 10 trains) = {}'.format(poisson.sf(10, mu))) print('P(more than 11 trains) = {}'.format(poisson.sf(11, mu))) # Add new observations new_obs = np.array([ 13, 14, 11, 10, 11, 13, 13, 9, 11, 14, 12, 11, 12, 14, 8, 13, 10, 14, 12, 13, 10, 9, 14, 13, 11, 14, 13, 14 ]) obs = np.concatenate([obs, new_obs]) mu = np.mean(obs) print('mu = {}'.format(mu))
def LoopSEDLikelihood(name, observed, tpl_dnde_log, tpl_index, eref, ra, dec, king, livetime, suffix, nside, redshift, addregular, dnde_pri_min, dnde_pri_max, idx_pri_min, idx_pri_max, binw=0.294, enr_min=4.00, nbin=4, t_start=770., t_stop=8233., nmaxevt_ebin=20): #t_stop will be included in the time window. TPL_CLASS = ('CalOnlyR100',) #, 'CalOnlyR30', 'CalOnlyR10') NREBIN = 1 #SCALE_FLUX = 1.0e-13 prob_skip = 1.0E-5 FILE_IN = ROOT.TFile(observed, 'READ') HTG_OBS = FILE_IN.Get('spectrum_observed') print HTG_OBS, 'has been found.' HTG_OBS_ENR = HTG_OBS.ProjectionY('{0}_projEnr'.format(HTG_OBS.GetName()), HTG_OBS.GetXaxis().FindBin(t_start), HTG_OBS.GetXaxis().FindBin(t_stop)) nobs = HTG_OBS_ENR.Integral(HTG_OBS_ENR.GetXaxis().FindBin(enr_min), HTG_OBS_ENR.GetXaxis().FindBin(enr_min+binw*nbin)) print nobs, 'observed events.' HTG_OBS_ENR.Rebin(NREBIN) HTG_OBS_ENR.SetLineWidth(0) HTG_OBS_ENR.SetLineColor(ROOT.kRed) HTG_OBS_ENR.SetMarkerColor(ROOT.kRed) HTG_OBS_ENR.SetMarkerStyle(20) HTG_OBS_ENR.SetFillStyle(0) PATH_FILE_OUT = 'LoopLikelihood_{0}{1}'.format(name, suffix) FILE_OUT = ROOT.TFile('{0}.root'.format(PATH_FILE_OUT), 'RECREATE') FILE_OUT.cd() # Histogram for results #xaxis = array('d', tpl_dnde) xaxis = np.array(tpl_dnde_log+(2.*tpl_dnde_log[-1]-tpl_dnde_log[-2],), dtype=float) #xaxis_scaled = xaxis/SCALE_FLUX #yaxis = array('d', tpl_index) yaxis = np.array(tpl_index+(2.*tpl_index[-1]-tpl_index[-2],), dtype=float) dct_htg_likeresult = {} dct_htg_likeratio = {} dct_htg_likerfrac = {} dct_htg_unlikerfrac = {} dct_htg_likecoverage = {} dct_cvs_likeresult = {} dct_cvs_likeratio = {} likelihood_max = {} xlocmax = {} ylocmax = {} for cla in TPL_CLASS: #dct_htg_likeresult[cla] = ROOT.TH2D('htg_likeresult', 'Likelihood;log_{{10}}dN/dE at {0:1.2e} MeV;PWL index'.format(eref), len(tpl_dnde_log), xaxis, len(tpl_index), yaxis) dct_htg_likeresult[cla] = ROOT.TGraph2D() dct_htg_likeresult[cla].SetName('htg_likeresult') dct_htg_likeresult[cla].SetTitle('Likelihood for data') dct_htg_likeresult[cla].GetXaxis().SetTitle('log_{{10}}dN/dE at {0:1.2e} MeV') dct_htg_likeresult[cla].GetYaxis().SetTitle('PWL index'.format(eref)) # dct_htg_likeratio[cla] = ROOT.TH2D('htg_likeratio', 'Likelihood Ratio;log_{{10}}dN/dE at {0:1.2e} MeV;PWL index'.format(eref), len(tpl_dnde_log), xaxis, len(tpl_index), yaxis) dct_htg_likeratio[cla] = ROOT.TGraph2D() dct_htg_likeratio[cla].SetName('htg_likeratio') dct_htg_likeratio[cla].SetTitle('Likelihood ratio with physically possible ideal case') dct_htg_likeratio[cla].GetXaxis().SetTitle('log_{{10}}dN/dE at {0:1.2e} MeV') dct_htg_likeratio[cla].GetYaxis().SetTitle('PWL index'.format(eref)) dct_htg_likerfrac[cla] = ROOT.TGraph2D() dct_htg_likerfrac[cla].SetName('htg_likerfrac') dct_htg_likerfrac[cla].SetTitle('Fraction of cases liker than data') dct_htg_likerfrac[cla].GetXaxis().SetTitle('log_{{10}}dN/dE at {0:1.2e} MeV') dct_htg_likerfrac[cla].GetYaxis().SetTitle('PWL index'.format(eref)) dct_htg_unlikerfrac[cla] = ROOT.TGraph2D() dct_htg_unlikerfrac[cla].SetName('htg_unlikerfrac') dct_htg_unlikerfrac[cla].SetTitle('Fraction of cases unliker than data') dct_htg_unlikerfrac[cla].GetXaxis().SetTitle('log_{{10}}dN/dE at {0:1.2e} MeV') dct_htg_unlikerfrac[cla].GetYaxis().SetTitle('PWL index'.format(eref)) dct_htg_likecoverage[cla] = ROOT.TGraph2D() dct_htg_likecoverage[cla].SetName('htg_likecoverage') dct_htg_likecoverage[cla].SetTitle('Fraction of cases covered by calculation') dct_htg_likecoverage[cla].GetXaxis().SetTitle('log_{{10}}dN/dE at {0:1.2e} MeV') dct_htg_likecoverage[cla].GetYaxis().SetTitle('PWL index'.format(eref)) dct_cvs_likeresult[cla] = ROOT.TCanvas('cvs_likeresult_{0}'.format(cla), '{0} Likelihood'.format(cla), 750, 750) dct_cvs_likeratio[cla] = ROOT.TCanvas('cvs_likeratio_{0}'.format(cla), '{0} Likelihood Ratio'.format(cla), 750, 750) likelihood_max[cla] = 0.0 xlocmax[cla] = 0.0 ylocmax[cla] = 0.0 likelihood_ceil = math.exp(-HTG_OBS_ENR.Integral()) for ienr in range(1, HTG_OBS_ENR.GetNbinsX()+1): ni = HTG_OBS_ENR.GetBinContent(ienr) likelihood_ceil = likelihood_ceil * math.pow(ni, ni)/math.factorial(ni) print 'Ideal maximum likelihood =', likelihood_ceil # Possible ideal likelihood (independent for model) nda_likelihood_bestpossible = [] nda_likelihood_best_directprod = np.ones(nmaxevt_ebin) for ienr in range(1, HTG_OBS_ENR.GetNbinsX()+1): print 'Energy range (observed): 10^{0} - 10^{1}'.format(HTG_OBS_ENR.GetXaxis().GetBinLowEdge(ienr), HTG_OBS_ENR.GetXaxis().GetBinUpEdge(ienr)) nda_likelihood_bestpossible.append(np.ones(nmaxevt_ebin)) for mevt in range(nmaxevt_ebin): nda_likelihood_bestpossible[-1][mevt] = nda_likelihood_bestpossible[-1][mevt] * math.exp(-mevt)*math.pow(mevt, mevt)/math.factorial(mevt) # Make a direct product array nda_likelihood_bestpossible_t = nda_likelihood_bestpossible[-1] # Transposing matrix if ienr>1: for jenr in range(ienr-1): nda_likelihood_bestpossible_t = nda_likelihood_bestpossible_t[:, np.newaxis] nda_likelihood_best_directprod = nda_likelihood_best_directprod * nda_likelihood_bestpossible_t # Broadcasting of np array #print nda_likelihood_best_directprod print 'Likelihood of physically ideal cases:' print nda_likelihood_best_directprod # This array's indeces are corresponding to observable count for each energy bin # Loop over dN/dE and PWL-index for (ix, dnde_log) in enumerate(tpl_dnde_log): dnde = 10**dnde_log print '====================' print 'dN/dE = {0:1.2e} at {1:1.1e} MeV'.format(dnde, eref) for (iy, idx_pl) in enumerate(tpl_index): print '--------------------' print 'PWL index = {0}'.format(idx_pl) lst_flux_itgl = ExtrapolateFlux.ExtrapolateFlux(eref, dnde, idx_pl, binw, enr_min, nbin, redshift) htg_flux = ROOT.TH1D('htg_flux', 'Integral flux', nbin, enr_min, enr_min+nbin*binw) for ibin in range(1, htg_flux.GetNbinsX()+1): htg_flux.SetBinContent(ibin, lst_flux_itgl[ibin-1]) htg_flux.SetBinError(ibin, 0) str_fp = 'dNdE{0:0>12d}_PWL{1}{2:0>3d}'.format(int(dnde*1e20+0.5), "n" if idx_pl<0 else "p", int(idx_pl*100+0.5)) suffix_fp = suffix + str_fp dct_htg_model = ModelPointSource.ModelPointSource(name, htg_flux, ra, dec, king, livetime, suffix_fp, nside, addregular) print dct_htg_model hs = ROOT.THStack('spectrum_{0}'.format(str_fp), 'log_{{10}}dN/dE={0:.2f} at {1} MeV, PWL-index={2:+f};log_{{10}}Energy [MeV];[counts]'.format(dnde_log, eref, idx_pl)) hs.Add(HTG_OBS_ENR) for (icla,cla) in enumerate(TPL_CLASS): print cla htg_model = dct_htg_model[cla] htg_model.Rebin(NREBIN) htg_model.SetLineWidth(2) htg_model.SetLineColor(ROOT.kGray) htg_model.SetLineStyle(icla+1) htg_model.SetMarkerColor(ROOT.kGray) hs.Add(htg_model) factor_expected_total = math.exp(-htg_model.Integral()) likelihood_data = factor_expected_total likelihood_data_highcut = poisson.cdf(nobs-1, htg_model.Integral()) likelihood_data_lowcut = poisson.sf(nobs-1, htg_model.Integral()) if likelihood_data_highcut<prob_skip or likelihood_data_lowcut<prob_skip: print 'Detaction probability of', nobs, 'events is smaller than', min(likelihood_data_highcut, likelihood_data_lowcut)*100, '%.' print 'Calculation is skipped...' dct_htg_likeresult[cla].SetPoint(dct_htg_likeresult[cla].GetN(), dnde_log, idx_pl, 0.) dct_htg_likeratio[cla].SetPoint(dct_htg_likeratio[cla].GetN(), dnde_log, idx_pl, 0.) dct_htg_likerfrac[cla].SetPoint(dct_htg_likerfrac[cla].GetN(), dnde_log, idx_pl, 1.-prob_skip) dct_htg_unlikerfrac[cla].SetPoint(dct_htg_unlikerfrac[cla].GetN(), dnde_log, idx_pl, 0.+prob_skip) dct_htg_likecoverage[cla].SetPoint(dct_htg_likecoverage[cla].GetN(), dnde_log, idx_pl, 1.-prob_skip) continue nda_likelihood_allpossible = [] nda_likelihood_allpossible_t = [] nda_likelihood_all_directprod = np.ones(nmaxevt_ebin) for ienr in range(1, htg_model.GetNbinsX()+1): print 'Energy range (model): 10^{0} - 10^{1}'.format(htg_model.GetXaxis().GetBinLowEdge(ienr), htg_model.GetXaxis().GetBinUpEdge(ienr)) print 'Energy range (observed): 10^{0} - 10^{1}'.format(HTG_OBS_ENR.GetXaxis().GetBinLowEdge(ienr), HTG_OBS_ENR.GetXaxis().GetBinUpEdge(ienr)) mi = htg_model.GetBinContent(ienr) ni = HTG_OBS_ENR.GetBinContent(ienr) likelihood_data = likelihood_data * math.pow(mi, ni)/math.factorial(ni) # For likelihood RATIO ordering nda_likelihood_allpossible.append(np.ones(nmaxevt_ebin)) for mevt in range(nmaxevt_ebin): nda_likelihood_allpossible[-1][mevt] = nda_likelihood_allpossible[-1][mevt] * math.pow(mi, mevt)/math.factorial(mevt) # Make a direct product array nda_likelihood_allpossible_t = nda_likelihood_allpossible[-1] # Transposing matrix if ienr>1: for jenr in range(ienr-1): nda_likelihood_allpossible_t = nda_likelihood_allpossible_t[:, np.newaxis] nda_likelihood_all_directprod = nda_likelihood_all_directprod * nda_likelihood_allpossible_t # Broadcasting of np array #print nda_likelihood_all_directprod print 'Likelihood of model and data =', likelihood_data nda_likelihood_all_directprod = nda_likelihood_all_directprod * factor_expected_total print 'Possible likelihood values:' print nda_likelihood_all_directprod # Array indeces are corresponding to observable count for each energy bin nda_likelihood_ratio_directprod = nda_likelihood_all_directprod / nda_likelihood_best_directprod print 'Possible likelihood ratio:' print nda_likelihood_ratio_directprod likelihood_ratio_data = likelihood_data / likelihood_ceil fprob_liker = 0. fprob_unliker = 0. for itpl, rvalue in enumerate(nda_likelihood_ratio_directprod.flat): if rvalue > likelihood_ratio_data: fprob_liker+=nda_likelihood_all_directprod.flat[itpl] else: fprob_unliker+=nda_likelihood_all_directprod.flat[itpl] print 'Data is', fprob_liker*100., '% likest case and excluded from acceptance interval by', fprob_unliker*100., '%.' fprob_coverage = fprob_liker + fprob_unliker print 'Calculation covers', fprob_coverage*100., '% of total possibility.' dct_htg_likeresult[cla].SetPoint(dct_htg_likeresult[cla].GetN(), dnde_log, idx_pl, likelihood_data) dct_htg_likeratio[cla].SetPoint(dct_htg_likeratio[cla].GetN(), dnde_log, idx_pl, likelihood_data/likelihood_ceil) dct_htg_likerfrac[cla].SetPoint(dct_htg_likerfrac[cla].GetN(), dnde_log, idx_pl, fprob_liker) dct_htg_unlikerfrac[cla].SetPoint(dct_htg_unlikerfrac[cla].GetN(), dnde_log, idx_pl, fprob_unliker) dct_htg_likecoverage[cla].SetPoint(dct_htg_likecoverage[cla].GetN(), dnde_log, idx_pl, fprob_liker+fprob_unliker) if likelihood_data>likelihood_max[cla]: likelihood_max[cla] = likelihood_data xlocmax[cla] = dnde_log ylocmax[cla] = idx_pl FILE_OUT.cd() hs.Write() del dct_htg_model del htg_flux FILE_OUT.cd() #likelihood_max = 0.0 #likelihood_temp = 0.0 #xlocmax = ROOT.Long() #ylocmax = ROOT.Long() #zlocmax = ROOT.Long() for cla in TPL_CLASS: dct_htg_likeresult[cla].Write() dct_htg_likeratio[cla].Write() dct_htg_likerfrac[cla].Write() dct_htg_unlikerfrac[cla].Write() dct_htg_likecoverage[cla].Write() dct_cvs_likeresult[cla].cd() dct_cvs_likeresult[cla].SetLogz() dct_htg_likeresult[cla].Draw("colz") #likelihood_max = dct_htg_likeresult[cla].GetMaximum() #dct_htg_likeresult[cla].GetMaximumBin(xlocmax, ylocmax, zlocmax) print '===== Maximum likelihood =====' print 'dNdE =', xlocmax[cla], 'at', eref, 'MeV' print 'PWL-index =', ylocmax[cla] dct_htg_likeresult[cla].GetZaxis().SetRangeUser(0.001*likelihood_max[cla], likelihood_max[cla]) dct_cvs_likeresult[cla].Write() dct_cvs_likeratio[cla].cd() dct_cvs_likeratio[cla].SetLogz() dct_htg_likeratio[cla].Draw("colz") dct_htg_likeratio[cla].GetZaxis().SetRangeUser(0.05, 0.68) dct_cvs_likeratio[cla].Write() return dct_htg_likeresult
def _cdf(self, x, mu): k = floor(x) if k == 0: return 0.0 else: return (poisson.cdf(k, mu) - poisson.pmf(0, mu)) / poisson.sf(0, mu)
def _ppf(self, q, mu): return poisson.ppf(poisson.sf(0, mu) * q + poisson.pmf(0, mu), mu)
def test_logpmf(self): poisson_logpmf = poisson.logpmf(4, 6) - np.log(poisson.sf(2, 6)) tpoisson_logpmf = truncatedpoisson.logpmf(4, 6, 2) assert_allclose(poisson_logpmf, tpoisson_logpmf, rtol=1e-7)
def polistat(poli,background): # for one second integration stat_1sec = OrderedDict() stat_1sec['Actual Distances'] = poli.actualdist stat_1sec['Actual Probability'] = poli.actualprob stat_1sec['Actual Probability'] = poli.actualprob stat_1sec['Actual Sigma +'] = poli.actualsig_pos stat_1sec['Actual Sigma -'] = poli.actualsig_neg stat_1sec['Name'] = poli.name stat_1sec['Title'] = '1 Second Integration time' stat_1sec['Alarm Setting'] = poli.alarm stat_1sec['Channel 1 Bin'] = poli.ch1_cut stat_1sec['Channel 2 Bin'] = poli.ch2_cut stat_1sec['Channel 3 Bin'] = poli.ch3_cut stat_1sec['Channel 4 Bin'] = poli.ch4_cut stat_1sec['Channel 1 Source Hits Per Sec'] = poli.ch1_rate #source rate stat_1sec['Channel 2 Source Hits Per Sec'] = poli.ch2_rate stat_1sec['Channel 3 Source Hits Per Sec'] = poli.ch3_rate stat_1sec['Channel 4 Source Hits Per Sec'] = poli.ch4_rate stat_1sec['Source Sigma Hit Rate'] = np.sqrt(poli.ch1_rate*poli.itime1)/poli.s_time stat_1sec['Channel 2 Source Sigma Hit Rate'] = np.sqrt(poli.ch2_rate*poli.itime1)/poli.s_time stat_1sec['Channel 3 Source Sigma Hit Rate'] = np.sqrt(poli.ch3_rate*poli.itime1)/poli.s_time stat_1sec['Channel 4 Source Sigma Hit Rate'] = np.sqrt(poli.ch4_rate*poli.itime1)/poli.s_time stat_1sec['Channel 1 Background Hit Rate'] = background.rate[0] stat_1sec['Channel 2 Background Hit Rate'] = background.rate[1] stat_1sec['Channel 3 Background Hit Rate'] = background.rate[2] stat_1sec['Channel 4 Background Hit Rate'] = background.rate[3] stat_1sec['Channel 1 Sigma Background Hit Rate'] = background.sig_ch1 stat_1sec['Channel 2 Sigma Background Hit Rate'] = background.sig_ch2 stat_1sec['Channel 3 Sigma Background Hit Rate'] = background.sig_ch3 stat_1sec['Channel 4 Sigma Background Hit Rate'] = background.sig_ch4 stat_1sec['Regular Background Sigma'] = background.real_sig stat_1sec['Combined Hit Rate Channel 1'] = poli.ch1_rate + background.rate[0] stat_1sec['Combined Hit Rate Channel 2'] = poli.ch2_rate + background.rate[1] stat_1sec['Combined Hit Rate Channel 3'] = poli.ch3_rate + background.rate[2] stat_1sec['Combined Hit Rate Channel 4'] = poli.ch4_rate + background.rate[3] stat_1sec['Sigma Combined Hit Rate Channel 1'] = np.sqrt( stat_1sec['Channel 1 Sigma Background Hit Rate']**2 + (background.sig_ch1)**2 ) stat_1sec['Sigma Combined Hit Rate Channel 2'] = np.sqrt( stat_1sec['Channel 2 Sigma Background Hit Rate']**2 + (background.sig_ch2)**2 ) stat_1sec['Sigma Combined Hit Rate Channel 3'] = np.sqrt( stat_1sec['Channel 3 Sigma Background Hit Rate']**2 + (background.sig_ch3)**2 ) stat_1sec['Sigma Combined Hit Rate Channel 4'] = np.sqrt( stat_1sec['Channel 4 Sigma Background Hit Rate']**2 + (background.sig_ch4)**2 ) stat_1sec['Channel 1 Sigma Above Background'] = stat_1sec['Channel 1 Source Hits Per Sec'] / background.sig_ch1 stat_1sec['Channel 2 Sigma Above Background'] = stat_1sec['Channel 2 Source Hits Per Sec'] / background.sig_ch2 stat_1sec['Channel 3 Sigma Above Background'] = stat_1sec['Channel 3 Source Hits Per Sec'] / background.sig_ch3 stat_1sec['Channel 4 Sigma Above Background'] = stat_1sec['Channel 4 Source Hits Per Sec'] / background.sig_ch4 stat_1sec['Channel 1 Source Rate Required to alarm'] = stat_1sec['Channel 1 Sigma Background Hit Rate']*stat_1sec['Alarm Setting'] stat_1sec['Channel 2 Source Rate Required to alarm'] = stat_1sec['Channel 2 Sigma Background Hit Rate']*stat_1sec['Alarm Setting'] stat_1sec['Channel 3 Source Rate Required to alarm'] = stat_1sec['Channel 3 Sigma Background Hit Rate']*stat_1sec['Alarm Setting'] stat_1sec['Channel 4 Source Rate Required to alarm'] = stat_1sec['Channel 4 Sigma Background Hit Rate']*stat_1sec['Alarm Setting'] ch1_mean = stat_1sec['Combined Hit Rate Channel 1'] ch2_mean = stat_1sec['Combined Hit Rate Channel 2'] ch3_mean = stat_1sec['Combined Hit Rate Channel 3'] ch4_mean = stat_1sec['Combined Hit Rate Channel 4'] ch1_sigma = stat_1sec['Sigma Combined Hit Rate Channel 1'] ch2_sigma = stat_1sec['Sigma Combined Hit Rate Channel 2'] ch3_sigma = stat_1sec['Sigma Combined Hit Rate Channel 3'] ch4_sigma = stat_1sec['Sigma Combined Hit Rate Channel 4'] ch1_alarm = stat_1sec['Channel 1 Source Rate Required to alarm'] # value needed to set off alarm ch2_alarm = stat_1sec['Channel 2 Source Rate Required to alarm'] ch3_alarm = stat_1sec['Channel 3 Source Rate Required to alarm'] ch4_alarm = stat_1sec['Channel 4 Source Rate Required to alarm'] stat_1sec['Channel 1 Probability to Alarm'] = poisson.sf(ch1_alarm,ch1_mean) stat_1sec['Channel 2 Probability to Alarm'] = poisson.sf(ch2_alarm,ch2_mean) stat_1sec['Channel 3 Probability to Alarm'] = poisson.sf(ch3_alarm,ch3_mean) stat_1sec['Channel 4 Probability to Alarm'] = poisson.sf(ch4_alarm,ch4_mean) # for two second integration time stat_2sec = OrderedDict() stat_2sec['Name'] = poli.name stat_2sec['Title'] = '2 Second Integration time' stat_2sec['Alarm Setting'] = poli.alarm stat_2sec['Channel 1 Bin'] = poli.ch1_cut stat_2sec['Channel 2 Bin'] = poli.ch2_cut stat_2sec['Channel 3 Bin'] = poli.ch3_cut stat_2sec['Channel 4 Bin'] = poli.ch4_cut stat_2sec['Channel 1 Source Hits Per Sec'] = poli.ch1_rate #source rate stat_2sec['Channel 2 Source Hits Per Sec'] = poli.ch2_rate stat_2sec['Channel 3 Source Hits Per Sec'] = poli.ch3_rate stat_2sec['Channel 4 Source Hits Per Sec'] = poli.ch4_rate stat_2sec['Channel 1 Source Sigma Hit Rate'] = np.sqrt(poli.ch1_rate*poli.itime2) / poli.itime2 stat_2sec['Channel 2 Source Sigma Hit Rate'] = np.sqrt(poli.ch2_rate*poli.itime2) / poli.itime2 stat_2sec['Channel 3 Source Sigma Hit Rate'] = np.sqrt(poli.ch3_rate*poli.itime2) / poli.itime2 stat_2sec['Channel 4 Source Sigma Hit Rate'] = np.sqrt(poli.ch4_rate*poli.itime2) / poli.itime2 stat_2sec['ch1 sigma stat all rate'] = [ np.sqrt(poli.ch1_rate*poli.itime2) / poli.itime2, np.sqrt(poli.ch2_rate*poli.itime2) / poli.itime2, np.sqrt(poli.ch3_rate*poli.itime2) / poli.itime2, np.sqrt(poli.ch4_rate*poli.itime2) / poli.itime2] stat_2sec['Channel 1 Background Hit Rate'] = background.rate[0] stat_2sec['Channel 2 Background Hit Rate'] = background.rate[1] stat_2sec['Channel 3 Background Hit Rate'] = background.rate[2] stat_2sec['Channel 4 Background Hit Rate'] = background.rate[3] stat_2sec['Channel 1 Sigma Background Hit Rate'] = background.sig_ch1 stat_2sec['Channel 2 Sigma Background Hit Rate'] = background.sig_ch2 stat_2sec['Channel 3 Sigma Background Hit Rate'] = background.sig_ch3 stat_2sec['Channel 4 Sigma Background Hit Rate'] = background.sig_ch4 stat_2sec['Combined Hit Rate Channel 1'] = poli.ch1_rate + background.rate[0] stat_2sec['Combined Hit Rate Channel 2'] = poli.ch2_rate + background.rate[1] stat_2sec['Combined Hit Rate Channel 3'] = poli.ch3_rate + background.rate[2] stat_2sec['Combined Hit Rate Channel 4'] = poli.ch4_rate + background.rate[3] stat_2sec['Sigma Combined Hit Rate Channel 1'] = np.sqrt( stat_2sec['Channel 1 Source Sigma Hit Rate']**2 + (background.sig_ch1)**2 ) stat_2sec['Sigma Combined Hit Rate Channel 2'] = np.sqrt( stat_2sec['Channel 2 Source Sigma Hit Rate']**2 + (background.sig_ch2)**2 ) stat_2sec['Sigma Combined Hit Rate Channel 3'] = np.sqrt( stat_2sec['Channel 3 Source Sigma Hit Rate']**2 + (background.sig_ch3)**2 ) stat_2sec['Sigma Combined Hit Rate Channel 4'] = np.sqrt( stat_2sec['Channel 4 Source Sigma Hit Rate']**2 + (background.sig_ch4)**2 ) stat_2sec['Channel 1 Sigma Above Background'] = stat_2sec['Channel 1 Source Hits Per Sec'] / background.sig_ch1 stat_2sec['Channel 2 Sigma Above Background'] = stat_2sec['Channel 2 Source Hits Per Sec'] / background.sig_ch2 stat_2sec['Channel 3 Sigma Above Background'] = stat_2sec['Channel 3 Source Hits Per Sec'] / background.sig_ch3 stat_2sec['Channel 4 Sigma Above Background'] = stat_2sec['Channel 4 Source Hits Per Sec'] / background.sig_ch4 stat_2sec['Channel 1 Source Rate Required to alarm'] = stat_2sec['Channel 1 Sigma Background Hit Rate']*stat_2sec['Alarm Setting'] stat_2sec['Channel 2 Source Rate Required to alarm'] = stat_2sec['Channel 2 Sigma Background Hit Rate']*stat_2sec['Alarm Setting'] stat_2sec['Channel 3 Source Rate Required to alarm'] = stat_2sec['Channel 3 Sigma Background Hit Rate']*stat_2sec['Alarm Setting'] stat_2sec['Channel 4 Source Rate Required to alarm'] = stat_2sec['Channel 4 Sigma Background Hit Rate']*stat_2sec['Alarm Setting'] ch1_mean = stat_2sec['Combined Hit Rate Channel 1'] ch2_mean = stat_2sec['Combined Hit Rate Channel 2'] ch3_mean = stat_2sec['Combined Hit Rate Channel 3'] ch4_mean = stat_2sec['Combined Hit Rate Channel 4'] ch1_sigma = stat_2sec['Sigma Combined Hit Rate Channel 1'] ch2_sigma = stat_2sec['Sigma Combined Hit Rate Channel 2'] ch3_sigma = stat_2sec['Sigma Combined Hit Rate Channel 3'] ch4_sigma = stat_2sec['Sigma Combined Hit Rate Channel 4'] ch1_alarm = stat_2sec['Channel 1 Source Rate Required to alarm'] # value needed to set off alarm ch2_alarm = stat_2sec['Channel 2 Source Rate Required to alarm'] # ch3_alarm = stat_2sec['Channel 3 Source Rate Required to alarm'] # ch4_alarm = stat_2sec['Channel 4 Source Rate Required to alarm'] # stat_2sec['Channel 1 Probability to Alarm'] = poisson.sf(ch1_alarm,ch1_mean) stat_2sec['Channel 2 Probability to Alarm'] = poisson.sf(ch2_alarm,ch2_mean) stat_2sec['Channel 3 Probability to Alarm'] = poisson.sf(ch3_alarm,ch3_mean) stat_2sec['Channel 4 Probability to Alarm'] = poisson.sf(ch4_alarm,ch4_mean) return stat_1sec, stat_2sec
def poisson_pore_fit(min_pore_size): return np.sum((fmax_means[1:] - poisson.sf(min_pore_size-1, bax_ratios[1:])) ** 2)
}) df = df.sort_values( ["chrom", "start", "end", "TTAA_chrom", "TTAA_start", "TTAA_end"]) groups = df.groupby(["chrom", "start", "end"]) first = groups.nth(0)["TTAA_start"] last = groups.nth(-1)["TTAA_end"] joined = pd.concat([first, last], axis=1).reset_index() refined = joined[["chrom", "TTAA_start", "TTAA_end"]] blocks = BedTool.from_dataframe(refined) data = blocks.intersect(experiment, c=True).intersect(background, c=True) df = data.to_dataframe() df = df.rename(index=str, columns={"name": "expHops", "score": "bgHops"}) df["norm_bgHops"] = df["bgHops"] * scaleFactor + args.pseudocount df["-log10pValue"] = -np.log10( poisson.sf(df["expHops"] - 1, df["norm_bgHops"])) outdf = df[df["-log10pValue"] >= args.pValueCutoff] outbed = BedTool.from_dataframe( df[df["-log10pValue"] >= args.pValueCutoff]) if args.distance: outbed = outbed.merge(d=args.distance) if args.minSize: minSize = args.minSize else: minSize = 0 if args.maxSize: maxSize = args.maxSize else:
def test_pmf_zero(self): poisson_pmf = poisson.pmf(2, 2) / poisson.sf(0, 2) tpoisson_pmf = truncatedpoisson.pmf(2, 2, 0) assert_allclose(poisson_pmf, tpoisson_pmf, rtol=1e-7)
def paramPaTau(pa, tau, Rmax=100, show=False, showProb=False): ph = 1 - pa # C1 a = 0 b = int(ph * tau) k = (a + b) // 2 while b - a > 1: if poisson.cdf(k, ph * tau) < epsilon: a = k k = (a + b) // 2 else: b = k k = (a + b) // 2 mumax = a / tau # C3 a = int(2 * tau) b = int(tau) k = (a + b) // 2 while a - b > 1: if poisson.sf(k, tau) < epsilon: a = k k = (a + b) // 2 else: b = k k = (a + b) // 2 mumin = a / (2 * tau) mu = mumax # C2 a = int(2 * pa * tau) b = int(pa * tau) k = int((a + b) // 2) while a - b > 1: if poisson.sf(k, pa * tau) < epsilon: a = k k = (a + b) // 2 else: b = k k = (a + b) // 2 if mu > 0: alpha = a / (mu * tau) else: alpha = np.nan # Majoration de Xh1t pmin = (1 - alpha) / 2 a = 0 b = pmin * ph * tau k = (a + b) // 2 while b - a > 1: if poisson.cdf(k, pmin * ph * tau) < epsilon: a = k k = (a + b) // 2 else: b = k k = (a + b) // 2 xh1t = a # C4 n = (1 - alpha) * mu * tau M = xh1t N = 2 * mu * tau a = 0 b = M k = (a + b) // 2 while (b - a) > 1: #tau/10**3 : # print(b-a) if hypergeom.cdf(k, N, M, n) * 12000 < epsilon: a = k k = (a + b) // 2 else: b = k k = (a + b) // 2 if mu > 0: lbd = (a / (mu * tau)) else: lbd = np.nan # rmax if lbd == 0 or np.isnan(lbd): rmax = np.nan else: rmax = 3 * int((alpha / lbd + 1)) if show: print("tau = ", tau) print("mumin = ", int(10000 * mumin) / 100, "%") print("mumax = ", int(10000 * mumax) / 100, "%") print("alpha = ", int(10000 * alpha) / 100, "%") print("pmin = ", int(10000 * pmin) / 100, "%") print("xh1t = ", 100 * xh1t / tau, "%") print("lbd = ", int(10000 * lbd) / 100, "%") print("rmax = ", rmax) print() # print("lbd*mu = ", int(10000*lbd*mu)/10000) if showProb: print("C1: ", poisson.cdf(mumax * tau, ph * tau)) print("C2: ", poisson.sf(alpha * mu * tau, pa * tau)) print("C3: ", poisson.sf(2 * mumin * tau, tau)) print("Xh1t: ", poisson.cdf(xh1t, pmin * ph * tau)) print("C4: ", 12000 * hypergeom.cdf(lbd * mu * tau, N, M, n)) print() return (tau, mumin, mumax, alpha, lbd, rmax)
def func(ll): y = range(0,nn) z = y[-1] y = np.append( poisson.pmf(y, ll) , [poisson.sf(z, ll)] ) return y
from scipy.stats import poisson # Média de acidentes 3 carros por dia # Probabilidade de 3 no dia poisson.pmf(3, 2) # Probabilidade de 3 ou menos no dia poisson.cdf(3, 2) # Probabilidade de mais de 3 no dia poisson.sf(3, 2)
import numpy as np from scipy.stats import poisson p = 0.95 alpha = 0.99 y = 20 num_trial = 1 success = 0. count = 0 for i in range(num_trial): print(i + 1, end='\r') tmp_y = 20 k = 0 while tmp_y > 0 and tmp_y < 200: _k = np.random.poisson(alpha) - 1 k += _k tmp_y += _k if tmp_y >= 200: success += np.exp(alpha - p) * np.power(p / alpha, k) count += 1 prob = success / num_trial print(prob) print(count) prob = poisson.sf(18, p) print(prob)