def poisson_test_two_sided_matrix(x, lm): pl = np.select( [x < lm, x > lm, x == lm], [poisson.cdf(x, lm), poisson.sf(x, lm), 1]) pu = np.select([x < lm, x > lm], [poisson.sf(poisson.isf(pl, lm), lm), poisson.sf(x, lm)]) return pl + pu
def inv_z_score_poisson(self, lamb: np.ndarray, z: np.ndarray) -> np.ndarray: """ Calculate the inverse of z-score of z = np.sqrt(2.) * erfcinv(2. * poisson.sf(x, lamb)) : lamb : expected background number count from outer aperture (lambda) : z : z-score of poisson map : return : number count of observed stars in the inner aperture """ return poisson.isf(0.5 * erfc(z / np.sqrt(2.)), lamb)
def qq_comp_binom_poiss_comp_determn_poiss(l,p,poisson_mu=4.0,n_sim=1000,\ alpha_hats = np.arange(0.0001,1.0,1e-3)): alphas = np.zeros(len(alpha_hats)) isfs = int(l * p) * poisson.isf(alpha_hats, poisson_mu) for _ in range(n_sim): comp_binom_rv = CompoundPoisson.rvs_s(poisson_mu, l, p, compound='binom') alphas += (comp_binom_rv > isfs) / n_sim return alphas, alpha_hats
def poisson_test_two_sided(x, lm): pl = poisson.cdf(x, lm) * (x < lm) + poisson.sf(x, lm) * (x > lm) + (x == lm) pu = poisson.sf(poisson.isf(pl, lm), lm) * (x < lm) + poisson.sf( x, lm) * (x > lm) # if x < lm : # assert((pl >= pu/2).all()) # if x > lm : # assert((pu >= pl/2).all()) return pl + pu
def main(): with open('../../../../data/pickle/lyrl.db', 'rb') as docs_sr: # noinspection PyArgumentList docs_data = pickle.load(docs_sr) freq_matrix = docs_data.freq_matrix.tocsc() coll_freqs = freq_matrix.sum(axis=0).A1 for i, coll_freq in enumerate(coll_freqs): term_freqs = Counter(freq_matrix[:,i].todense().A1) lambda_ = coll_freq/len(docs_data.docs) # isf(p) gives the smallest x s.t. 1 - cdf(x) < p # we find the smallest x s.t. cdf(x)**num_docs < p, # i.e. the frequency where we have less than 0.99 probability of having all frequencies <=x max_poisson_term_freq = poisson.isf(1-(1-0.01)**(1/len(docs_data.docs)), lambda_) if max(term_freqs.keys())<max_poisson_term_freq: print(docs_data.terms[i])
def poisson_cutoff(scores, window): ## histogram import numpy hist, bin_edges = numpy.histogram(scores, range(0,max(scores)+window,window)) start, end = 0, 0 for i in range(len(hist)): if hist[i] == max(hist): start = bin_edges[i] end = bin_edges[i+1] break mean, cc = 0, 0 for s in scores: if start <= s and s < end: mean += s cc += 1 mean = mean/float(cc) from scipy.stats import poisson return poisson.isf(q, mean), mean
def poisson_cutoff(scores, window): ## histogram import numpy hist, bin_edges = numpy.histogram(scores, range(0, max(scores) + window, window)) start, end = 0, 0 for i in range(len(hist)): if hist[i] == max(hist): start = bin_edges[i] end = bin_edges[i + 1] break mean, cc = 0, 0 for s in scores: if start <= s and s < end: mean += s cc += 1 mean = mean / float(cc) from scipy.stats import poisson return poisson.isf(q, mean), mean
def display_spectrum(self): """ Make a plot of the current spectrum and its residuals (integrated over space) :return: a matplotlib.Figure """ n_point_sources = self._likelihood_model.get_number_of_point_sources() n_ext_sources = self._likelihood_model.get_number_of_extended_sources() total_counts = np.zeros(len(self._active_planes), dtype=float) total_model = np.zeros_like(total_counts) model_only = np.zeros_like(total_counts) net_counts = np.zeros_like(total_counts) yerr_low = np.zeros_like(total_counts) yerr_high = np.zeros_like(total_counts) for i, energy_id in enumerate(self._active_planes): data_analysis_bin = self._maptree[energy_id] this_model_map_hpx = self._get_expectation(data_analysis_bin, energy_id, n_point_sources, n_ext_sources) this_model_tot = np.sum(this_model_map_hpx) this_data_tot = np.sum(data_analysis_bin.observation_map.as_partial()) this_bkg_tot = np.sum(data_analysis_bin.background_map.as_partial()) total_counts[i] = this_data_tot net_counts[i] = this_data_tot - this_bkg_tot model_only[i] = this_model_tot this_wh_model = this_model_tot + this_bkg_tot total_model[i] = this_wh_model if this_data_tot >= 50.0: # Gaussian limit # Under the null hypothesis the data are distributed as a Gaussian with mu = model # and sigma = sqrt(model) # NOTE: since we neglect the background uncertainty, the background is part of the # model yerr_low[i] = np.sqrt(this_data_tot) yerr_high[i] = np.sqrt(this_data_tot) else: # Low-counts # Under the null hypothesis the data are distributed as a Poisson distribution with # mean = model, plot the 68% confidence interval (quantile=[0.16,1-0.16]). # NOTE: since we neglect the background uncertainty, the background is part of the # model quantile = 0.16 mean = this_wh_model y_low = poisson.isf(1-quantile, mu=mean) y_high = poisson.isf(quantile, mu=mean) yerr_low[i] = mean-y_low yerr_high[i] = y_high-mean residuals = old_div((total_counts - total_model), np.sqrt(total_model)) residuals_err = [old_div(yerr_high, np.sqrt(total_model)), old_div(yerr_low, np.sqrt(total_model))] yerr = [yerr_high, yerr_low] return self._plot_spectrum(net_counts, yerr, model_only, residuals, residuals_err)