Пример #1
0
def poisson_test_two_sided_matrix(x, lm):
    pl = np.select(
        [x < lm, x > lm, x == lm],
        [poisson.cdf(x, lm), poisson.sf(x, lm), 1])
    pu = np.select([x < lm, x > lm],
                   [poisson.sf(poisson.isf(pl, lm), lm),
                    poisson.sf(x, lm)])

    return pl + pu
    def inv_z_score_poisson(self, lamb: np.ndarray,
                            z: np.ndarray) -> np.ndarray:
        """ Calculate the inverse of z-score of
        z = np.sqrt(2.) * erfcinv(2. * poisson.sf(x, lamb))

        : lamb : expected background number count from outer aperture (lambda)
        : z : z-score of poisson map
        : return : number count of observed stars in the inner aperture
        """
        return poisson.isf(0.5 * erfc(z / np.sqrt(2.)), lamb)
Пример #3
0
 def qq_comp_binom_poiss_comp_determn_poiss(l,p,poisson_mu=4.0,n_sim=1000,\
                                         alpha_hats = np.arange(0.0001,1.0,1e-3)):
     alphas = np.zeros(len(alpha_hats))
     isfs = int(l * p) * poisson.isf(alpha_hats, poisson_mu)
     for _ in range(n_sim):
         comp_binom_rv = CompoundPoisson.rvs_s(poisson_mu,
                                               l,
                                               p,
                                               compound='binom')
         alphas += (comp_binom_rv > isfs) / n_sim
     return alphas, alpha_hats
Пример #4
0
def poisson_test_two_sided(x, lm):
    pl = poisson.cdf(x, lm) * (x < lm) + poisson.sf(x, lm) * (x > lm) + (x
                                                                         == lm)
    pu = poisson.sf(poisson.isf(pl, lm), lm) * (x < lm) + poisson.sf(
        x, lm) * (x > lm)

    # if x < lm :
    #     assert((pl >= pu/2).all())
    # if x > lm :
    #     assert((pu >= pl/2).all())

    return pl + pu
Пример #5
0
def main():
    with open('../../../../data/pickle/lyrl.db', 'rb') as docs_sr:
        # noinspection PyArgumentList
        docs_data = pickle.load(docs_sr)
        freq_matrix = docs_data.freq_matrix.tocsc()
        coll_freqs = freq_matrix.sum(axis=0).A1
        for i, coll_freq in enumerate(coll_freqs):
            term_freqs = Counter(freq_matrix[:,i].todense().A1)
            lambda_ = coll_freq/len(docs_data.docs)
            # isf(p) gives the smallest x s.t. 1 - cdf(x) < p
            # we find the smallest x s.t. cdf(x)**num_docs < p,
            # i.e. the frequency where we have less than 0.99 probability of having all frequencies <=x
            max_poisson_term_freq = poisson.isf(1-(1-0.01)**(1/len(docs_data.docs)), lambda_)
            if max(term_freqs.keys())<max_poisson_term_freq:
                print(docs_data.terms[i])
Пример #6
0
def poisson_cutoff(scores, window):
    ## histogram
    import numpy
    hist, bin_edges = numpy.histogram(scores, range(0,max(scores)+window,window))
    start, end = 0, 0
    for i in range(len(hist)):
        if hist[i] == max(hist):
            start = bin_edges[i]
            end = bin_edges[i+1]
            break
    mean, cc = 0, 0
    for s in scores:
        if start <= s and s < end:
            mean += s
            cc += 1
    mean = mean/float(cc)
    from scipy.stats import poisson
    return poisson.isf(q, mean), mean
Пример #7
0
def poisson_cutoff(scores, window):
    ## histogram
    import numpy
    hist, bin_edges = numpy.histogram(scores,
                                      range(0,
                                            max(scores) + window, window))
    start, end = 0, 0
    for i in range(len(hist)):
        if hist[i] == max(hist):
            start = bin_edges[i]
            end = bin_edges[i + 1]
            break
    mean, cc = 0, 0
    for s in scores:
        if start <= s and s < end:
            mean += s
            cc += 1
    mean = mean / float(cc)
    from scipy.stats import poisson
    return poisson.isf(q, mean), mean
Пример #8
0
    def display_spectrum(self):
        """
        Make a plot of the current spectrum and its residuals (integrated over space)

        :return: a matplotlib.Figure
        """

        n_point_sources = self._likelihood_model.get_number_of_point_sources()
        n_ext_sources = self._likelihood_model.get_number_of_extended_sources()

        total_counts = np.zeros(len(self._active_planes), dtype=float)
        total_model = np.zeros_like(total_counts)
        model_only = np.zeros_like(total_counts)
        net_counts = np.zeros_like(total_counts)
        yerr_low = np.zeros_like(total_counts)
        yerr_high = np.zeros_like(total_counts)

        for i, energy_id in enumerate(self._active_planes):

            data_analysis_bin = self._maptree[energy_id]

            this_model_map_hpx = self._get_expectation(data_analysis_bin, energy_id, n_point_sources, n_ext_sources)

            this_model_tot = np.sum(this_model_map_hpx)

            this_data_tot = np.sum(data_analysis_bin.observation_map.as_partial())
            this_bkg_tot = np.sum(data_analysis_bin.background_map.as_partial())

            total_counts[i] = this_data_tot
            net_counts[i] = this_data_tot - this_bkg_tot
            model_only[i] = this_model_tot

            this_wh_model = this_model_tot + this_bkg_tot
            total_model[i] = this_wh_model

            if this_data_tot >= 50.0:

                # Gaussian limit
                # Under the null hypothesis the data are distributed as a Gaussian with mu = model
                # and sigma = sqrt(model)
                # NOTE: since we neglect the background uncertainty, the background is part of the
                # model
                yerr_low[i] = np.sqrt(this_data_tot)
                yerr_high[i] = np.sqrt(this_data_tot)

            else:

                # Low-counts
                # Under the null hypothesis the data are distributed as a Poisson distribution with
                # mean = model, plot the 68% confidence interval (quantile=[0.16,1-0.16]).
                # NOTE: since we neglect the background uncertainty, the background is part of the
                # model
                quantile = 0.16
                mean = this_wh_model
                y_low = poisson.isf(1-quantile, mu=mean)
                y_high = poisson.isf(quantile, mu=mean)
                yerr_low[i] = mean-y_low
                yerr_high[i] = y_high-mean

        residuals = old_div((total_counts - total_model), np.sqrt(total_model))
        residuals_err = [old_div(yerr_high, np.sqrt(total_model)),
                         old_div(yerr_low, np.sqrt(total_model))]

        yerr = [yerr_high, yerr_low]

        return self._plot_spectrum(net_counts, yerr, model_only, residuals, residuals_err)