Exemplo n.º 1
0
    def test_against_ks_1samp(self, alternative, a):
        # test that monte_carlo_test can reproduce pvalue of ks_1samp
        rng = np.random.default_rng(65723433)

        x = stats.skewnorm.rvs(a=a, size=30, random_state=rng)
        expected = stats.ks_1samp(x, stats.norm.cdf, alternative=alternative)

        def statistic1d(x):
            return stats.ks_1samp(x,
                                  stats.norm.cdf,
                                  mode='asymp',
                                  alternative=alternative).statistic

        norm_rvs = self.rvs(stats.norm.rvs, rng)
        res = monte_carlo_test(x,
                               norm_rvs,
                               statistic1d,
                               n_resamples=1000,
                               vectorized=False,
                               alternative=alternative)

        assert_allclose(res.statistic, expected.statistic)
        if alternative == 'greater':
            assert_allclose(res.pvalue, expected.pvalue, atol=self.atol)
        elif alternative == 'less':
            assert_allclose(1 - res.pvalue, expected.pvalue, atol=self.atol)
Exemplo n.º 2
0
def _generic_sample_test(prior_class, td_class=NotImplemented, n_samples=100000, seed=123, np_cdf=None, **kwargs):
    torch.manual_seed(seed)
    dist = prior_class(torch.Size([n_samples]), **kwargs)
    if np_cdf is None:
        td_dist = td_class(**kwargs)
        @torch.no_grad()
        def np_cdf(x):
            return td_dist.cdf(torch.from_numpy(x)).numpy()
    _, p = stats.ks_1samp(dist().detach(), np_cdf, mode='exact')
    assert p > 0.3
Exemplo n.º 3
0
    def test_multivariate_t_cdf_1dim(self, n_samples=100000):
        torch.manual_seed(102)
        loc = -0.3
        scale = 1.2
        df = 3

        def np_cdf(x):
            return stats.t.cdf(x, df=df, loc=loc, scale=scale / math.sqrt(df))
        dist = prior.MultivariateT(torch.Size([n_samples, 1]), loc=loc,
                                   scale_tril=scale, df=3, event_dim=1)
        _, p = stats.ks_1samp(dist().detach().squeeze(-1),
                              np_cdf, mode='exact')
        assert p > 0.3
Exemplo n.º 4
0
def PNLF_fitter(params, data, data_err, obs_comp, M_5007, m_5007, min_stat="KS_1samp", comp_lim=False):
    """LMfit minimisation function. Creates a PNLF using paramters, forms the PNLF CDF and PNe empricial CDF.

    Parameters
    ----------
    params : [dictionary]
        LMfit parameter instance
    data : [list / array]
        PNe apparent magnitudes, in [OIII]
    obs_comp : [list / array]
        Observed completeness profile / ratio for the galaxy
    M_5007 : [list / array]
        Absolute magnitude, in [OIII], array (-4.53 to 0.53).
    m_5007 : [list / array]
        Apparent magnitude, in [OIII], array (26.0 to 31.0).

    Returns
    -------
    [list / array]
        LMfit - residual = abs( data - model )
    """    
    M_star = params["M_star"]
    dM = params["dM"]
    c1 = params["c1"]
    c2 = params["c2"]
    c3 = params["c3"]
    
    PNLF = calc_PNLF(m_star=M_star+dM, mag=M_5007+dM, c_1=c1, c_2=c2, c_3=c3) 

    if min_stat == "chi2":
        if comp_lim == True:
            completeness_lim_mag = m_5007[obs_comp>=0.5].max()
            PNLF_CDF = form_PNLF_CDF(data, PNLF, dM, obs_comp, M_5007, m_5007)
            PNLF_CDF = PNLF_CDF[data<completeness_lim_mag]
            x, PNe_CDF = ecdf(data)
            PNe_CDF = PNe_CDF[data<completeness_lim_mag]

        elif comp_lim == False:
            PNLF_CDF = form_PNLF_CDF(data, PNLF, dM, obs_comp, M_5007, m_5007)
            x, PNe_CDF = ecdf(data)

        return np.abs(PNe_CDF-PNLF_CDF) #/ np.sort(data_err)

    elif min_stat == "KS_1samp":
        KS_stat, pvalue = stats.ks_1samp(data, form_PNLF_CDF, args=(PNLF, dM, obs_comp, M_5007, m_5007 ))
        return KS_stat
Exemplo n.º 5
0
def dynesty_stats_plot(sampler):
    """
    Plot diagnostic statistics from a dynesty run

    The plotted quantities per iteration are:
    - nc: the number of likelihood calls
    - scale: the scale applied to the MCMC steps
    - lifetime: the number of iterations a point stays in the live set

    There is also a histogram of the lifetime compared with the theoretical
    distribution. To avoid edge effects, we discard the first 6 * nlive

    Parameters
    ----------
    sampler

    Returns
    -------
    fig: matplotlib.pyplot.figure.Figure
        Figure handle for the new plot
    axs: matplotlib.pyplot.axes.Axes
        Axes handles for the new plot

    """
    import matplotlib.pyplot as plt
    from scipy.stats import geom, ks_1samp

    fig, axs = plt.subplots(nrows=4, figsize=(8, 8))
    for ax, name in zip(axs, ["nc", "scale"]):
        ax.plot(getattr(sampler, "saved_{}".format(name)), color="blue")
        ax.set_ylabel(name.title())
    lifetimes = np.arange(len(sampler.saved_it)) - sampler.saved_it
    axs[-2].set_ylabel("Lifetime")
    nlive = sampler.nlive
    burn = int(geom(p=1 / nlive).isf(1 / 2 / nlive))
    if len(sampler.saved_it) > burn + sampler.nlive:
        axs[-2].plot(np.arange(0, burn), lifetimes[:burn], color="grey")
        axs[-2].plot(np.arange(burn, len(lifetimes) - nlive), lifetimes[burn: -nlive], color="blue")
        axs[-2].plot(np.arange(len(lifetimes) - nlive, len(lifetimes)), lifetimes[-nlive:], color="red")
        lifetimes = lifetimes[burn: -nlive]
        ks_result = ks_1samp(lifetimes, geom(p=1 / nlive).cdf)
        axs[-1].hist(
            lifetimes,
            bins=np.linspace(0, 6 * nlive, 60),
            histtype="step",
            density=True,
            color="blue",
            label=f"p value = {ks_result.pvalue:.3f}"
        )
        axs[-1].plot(
            np.arange(1, 6 * nlive),
            geom(p=1 / nlive).pmf(np.arange(1, 6 * nlive)),
            color="red"
        )
        axs[-1].set_xlim(0, 6 * nlive)
        axs[-1].legend()
        axs[-1].set_yscale("log")
    else:
        axs[-2].plot(np.arange(0, len(lifetimes) - nlive), lifetimes[:-nlive], color="grey")
        axs[-2].plot(np.arange(len(lifetimes) - nlive, len(lifetimes)), lifetimes[-nlive:], color="red")
    axs[-2].set_yscale("log")
    axs[-2].set_xlabel("Iteration")
    axs[-1].set_xlabel("Lifetime")
    return fig, axs
Exemplo n.º 6
0
        stacked_pdf,
        lw=2,
        zorder=2,
        c=bpl.color_cycle[2],
    )
    ax.plot(
        radii_plot,
        cat_pdf,
        lw=4,
        zorder=4,
        c=bpl.color_cycle[0],
    )
    # calculate the KS test value. Compare to our base CDF each time.
    pvalue = stats.ks_1samp(
        cat["r_eff_pc"],
        cdf_func,
        alternative="two-sided",
    )[1]
    if len(cat) > n_min:
        if pvalue > 0.05:
            n_p_05 += 1
        if pvalue > 0.01:
            n_p_01 += 1

    peak_r = calculate_peak(radii_plot, cat_pdf)

    # put this on the peak plot, point out outliers
    if galaxy in ["ngc7793", "ngc1566"]:
        ax_peak.scatter([peak_r], len(cat), c=bpl.color_cycle[3])
        ax_peak.add_text(
            x=peak_r + 0.1,
Exemplo n.º 7
0
 def time_ks_1samp(self, alternative, mode):
     stats.ks_1samp(self.a,
                    stats.norm.cdf,
                    alternative=alternative,
                    mode=mode)
Exemplo n.º 8
0
def search_around_poly(binary_warped, left_fit, right_fit, isFirst):
    # First image setup
    if isFirst:
        resultFirst = fit_poly_init(binary_warped)
        return resultFirst

    # HYPERPARAMETER
    # Choose the width of the margin around the previous polynomial to search
    margin = 25

    # Grab activated pixels
    nonzero = binary_warped.nonzero()
    nonzeroy = np.array(nonzero[0])
    nonzerox = np.array(nonzero[1])

    # Set the area of search based on activated x-values
    # within the +/- margin of our polynomial function
    left_lane_inds = (
        (nonzerox >
         (left_fit[0] *
          (nonzeroy**2) + left_fit[1] * nonzeroy + left_fit[2] - margin)) &
        (nonzerox <
         (left_fit[0] *
          (nonzeroy**2) + left_fit[1] * nonzeroy + left_fit[2] + margin)))
    right_lane_inds = (
        (nonzerox >
         (right_fit[0] *
          (nonzeroy**2) + right_fit[1] * nonzeroy + right_fit[2] - margin)) &
        (nonzerox <
         (right_fit[0] *
          (nonzeroy**2) + right_fit[1] * nonzeroy + right_fit[2] + margin)))

    # Monitor if noise is contributing to lane deviation
    bOptimize = False
    deltaMarginL = 0
    deltaMarginR = 0
    while not bOptimize:
        testMargin = 5
        test_left_lane_inds = (
            (nonzerox > (left_fit[0] * (nonzeroy**2) + left_fit[1] * nonzeroy +
                         left_fit[2] + margin + deltaMarginL)) &
            (nonzerox < (left_fit[0] *
                         (nonzeroy**2) + left_fit[1] * nonzeroy + left_fit[2] +
                         (margin + deltaMarginL + testMargin)))).nonzero()[0]
        test_right_lane_inds = (
            (nonzerox >
             (right_fit[0] *
              (nonzeroy**2) + right_fit[1] * nonzeroy + right_fit[2] -
              (margin + deltaMarginR + testMargin))) &
            (nonzerox <
             (right_fit[0] *
              (nonzeroy**2) + right_fit[1] * nonzeroy + right_fit[2] -
              (margin + deltaMarginR)))).nonzero()[0]

        if len(test_left_lane_inds) > 1000:
            deltaMarginL += testMargin
            left_lane_inds = ((nonzerox >
                               (left_fit[0] *
                                (nonzeroy**2) + left_fit[1] * nonzeroy +
                                left_fit[2] - margin + deltaMarginR)) &
                              (nonzerox <
                               (left_fit[0] *
                                (nonzeroy**2) + left_fit[1] * nonzeroy +
                                left_fit[2] + margin + deltaMarginR)))
        if len(test_right_lane_inds) > 1000:
            deltaMarginR += testMargin
            right_lane_inds = ((nonzerox >
                                (right_fit[0] *
                                 (nonzeroy**2) + right_fit[1] * nonzeroy +
                                 right_fit[2] - margin - deltaMarginR)) &
                               (nonzerox <
                                (right_fit[0] *
                                 (nonzeroy**2) + right_fit[1] * nonzeroy +
                                 right_fit[2] + margin - deltaMarginR)))
        else:
            bOptimize = True

    # Check if incoming portion along curve has detected pixel values
    # Experimentally shown to occur when image is blown out
    bDetected = True
    if not bDetected:
        closest_left_count = (nonzeroy[left_lane_inds] <
                              (0.2 * binary_warped.shape[0]))
        closest_right_count = (nonzeroy[left_lane_inds] <
                               (0.2 * binary_warped.shape[0]))
        if (len(closest_left_count) < 10000) or (len(closest_right_count) <
                                                 10000):
            bDetected = False
            histogram = np.sum(binary_warped[binary_warped.shape[0] // 2:, :],
                               axis=0)
            histogram[histogram < 15000] = 0
            histogram[:200] = 0
            histogram[1000:] = 0
            midpoint = np.int(histogram.shape[0] // 2)
            leftPoint = np.argmax(histogram[:midpoint])
            rightPoint = np.argmax(histogram[midpoint:]) + midpoint
            if (leftPoint > 200) and (rightPoint < 1000) \
                    and (stats.ks_1samp(histogram, stats.norm.cdf, alternative='greater')[0] > 0.49):
                config.falseCount += 1
                if config.falseCount > 2:
                    print('Entered hist')
                    config.falseCount = 0
                    fit_poly_init(binary_warped)

    if bDetected:
        # Again, extract left and right line pixel positions
        leftx = nonzerox[left_lane_inds]
        lefty = nonzeroy[left_lane_inds]
        rightx = nonzerox[right_lane_inds]
        righty = nonzeroy[right_lane_inds]

        # Fit new polynomials
        left_fitx, right_fitx, ploty = fit_poly(binary_warped.shape, leftx,
                                                lefty, rightx, righty)
    else:
        # Use old lines params
        # Use previous curve if large noise forces curve to change rapidly
        left_fit = config.left_fit_global
        right_fit = config.right_fit_global
        ploty = np.linspace(0, binary_warped.shape[0] - 1,
                            binary_warped.shape[0])
        try:
            left_fitx = left_fit[0] * ploty**2 + left_fit[
                1] * ploty + left_fit[2]
            right_fitx = right_fit[0] * ploty**2 + right_fit[
                1] * ploty + right_fit[2]
        except TypeError:
            # Avoids an error if `left` and `right_fit` are still none or incorrect
            print('The function failed to fit a line!')
            left_fitx = 1 * ploty**2 + 1 * ploty
            right_fitx = 1 * ploty**2 + 1 * ploty

    ## Visualization ##
    # Create an image to draw on and an image to show the selection window
    out_img = np.dstack((binary_warped, binary_warped, binary_warped)) * 255
    window_img = np.zeros_like(out_img)
    # Color in left and right line pixels
    out_img[nonzeroy[left_lane_inds], nonzerox[left_lane_inds]] = [255, 0, 0]
    out_img[nonzeroy[right_lane_inds], nonzerox[right_lane_inds]] = [0, 0, 255]

    # Generate a polygon to illustrate the search window area
    # And recast the x and y points into usable format for cv2.fillPoly()
    # left_line_window1 = np.array([np.transpose(np.vstack([left_fitx - margin, ploty]))])
    # left_line_window2 = np.array([np.flipud(np.transpose(np.vstack([left_fitx + margin, ploty])))])
    # left_line_pts = np.hstack((left_line_window1, left_line_window2))
    #
    # right_line_window1 = np.array([np.transpose(np.vstack([right_fitx - margin, ploty]))])
    # right_line_window2 = np.array([np.flipud(np.transpose(np.vstack([right_fitx + margin, ploty])))])
    # right_line_pts = np.hstack((right_line_window1, right_line_window2))

    left_line_window_wide = np.array([
        np.flipud(np.transpose(np.vstack([left_fitx - (margin // 8), ploty])))
    ])
    right_line_window_wide = np.array(
        [np.transpose(np.vstack([right_fitx + (margin // 8), ploty]))])
    left_right_combo_pts = np.hstack(
        (right_line_window_wide, left_line_window_wide))

    # Draw the lane onto the warped blank image
    # cv2.fillPoly(window_img, np.int_([left_line_pts]), (255, 255, 0))
    # cv2.fillPoly(window_img, np.int_([right_line_pts]), (255, 255, 0))
    cv2.fillPoly(window_img, np.int_([left_right_combo_pts]), (0, 255, 0))
    result = cv2.addWeighted(out_img, 1, window_img, 0.6, 0)

    # Plot the polynomial lines onto the image
    plt.plot(left_fitx, ploty, color='yellow')
    plt.plot(right_fitx, ploty, color='yellow')
    ## End visualization steps ##

    return result
Exemplo n.º 9
0
    def sanity_checks_DUpdate(self):

        dist_check = isinstance(self.distribution, str) or isinstance(
            self.distribution, dict)
        data_check = isinstance(self.data, pd.DataFrame) or self.data == None

        if not data_check:
            raise ValueError("'data' should only of type 'pd.DataFrame'")
        if not (dist_check or data_check):
            raise ValueError(
                "Either one of the 'data' or 'distribution' should be provided"
            )
        if not callable(self.calc_func):
            raise ValueError("'calc_func' should be a 'callable'")

        self.variables = inspect.getargspec(self.calc_func)[0]

        if isinstance(self.distribution, dict):
            # Check if the dictionary has the value is one of [dict] or [callable]
            check1 = all(
                isinstance(v, dict) for k, v in self.distribution.items())
            check2 = all(callable(v) for k, v in self.distribution.items())

            if check1 or check2:
                if check1:
                    # Check for the parameters in the distribution
                    for var, var_dict in self.distribution.items():
                        if not ('dist_name' in var_dict):
                            raise ValueError(
                                "'dist_name' should be in the parameters.")
                        if var_dict['dist_name'] not in DISTRIBUTIONS:
                            raise ValueError(
                                f"{var} x {var_dict['dist_name']}, {var_dict['dist_name']} is not one of the available.."
                            )
                        dist = getattr(st, var_dict['dist_name'])
                        dist_kwargs = var_dict.copy()
                        del dist_kwargs['dist_name']
                        dist = dist(**dist_kwargs)
                        self.distribution[var] = dist
                # elif check2:
            else:
                raise ValueError(
                    "The type of value in key:value of distribution should be one of ['dict','function']"
                )

        elif isinstance(self.distribution,
                        str) and self.distribution == 'auto' and data_check:
            self.distribution = {}
            # Find the best probability distribution fit for the variables
            if not all(
                [k in self.data.columns.tolist() for k in self.variables]):
                missing_cols = [
                    k for k in self.variables
                    if k not in self.data.columns.tolist()
                ]
                raise ValueError(
                    f"'data' doesnt contain {missing_cols} which are there in the 'formulae'"
                )

            for evar in self.variables:
                data_vals = self.data[evar].values
                data_valsN = len(data_vals)
                max_pvalue = 0
                max_pvalue_dist, max_pvalue_distname = None, None

                for each_distname in DISTRIBUTIONS:
                    check_dist = getattr(st, each_distname)
                    try:
                        check_distparams = check_dist.fit(data_vals)
                        ## Check for the AIC values
                        # k = len(check_distparams)
                        # logLik = np.sum(check_dist.logpdf(data_vals, *check_distparams))
                        # aic = 2*k - 2*(logLik)
                        D, pvalue = st.ks_1samp(data_vals,
                                                check_dist.cdf,
                                                args=check_distparams)
                        if pvalue > max_pvalue and pvalue >= 0:
                            max_pvalue = pvalue
                            max_pvalue_dist = check_dist(*check_distparams)
                            max_pvalue_distname = each_distname
                    except:
                        pass
                self.distribution[evar] = max_pvalue_dist
Exemplo n.º 10
0
 def statistic1d(x):
     return stats.ks_1samp(x,
                           stats.norm.cdf,
                           mode='asymp',
                           alternative=alternative).statistic
Exemplo n.º 11
0
spx_close_unadjusted = (spx_close - spx_close.mean()) / spx_close.std()
spx_close_unadjusted = spx_close_unadjusted.sort_values()

# Normalization with VIX-adjustment
df = spx_close / vix_close
df = (df - df.mean()) / df.std()
df_sorted = df.sort_values()

for x in (1.0, 1.3, 1.7, 2.0, 2.3, 2.7, 3.0, 3.3, 3.7, 4.0):
    print(
        "The actual estimated probability of deviation beyond %3.1f standard deviations is %10.8f vs. the theoretical probability of %10.8f"
        % (x, (sum(df > x) / df.shape[0] + sum(df < -x) / df.shape[0]) / 2.0,
           1.0 - norm.cdf(x)))

print("Shapiro test: ", shapiro(df_sorted))
print("Kolmogorov-Smirnov test: ", ks_1samp(df_sorted, norm.cdf))

normal_quantiles = norm.cdf(df_sorted)
real_quantiles = np.linspace(0.0, 1.0, df_sorted.shape[0])

plt.plot(spx_close_unadjusted, real_quantiles, color='#e3526e')
plt.plot(df_sorted, normal_quantiles, color='#3e084c')
plt.plot(df_sorted, real_quantiles, color='#025d93')
plt.legend(["Unadjusted returns", "Normal", "VIX-adjusted returns"],
           loc="lower right")
plt.title('CDFs')
plt.xlabel('Normalized daily S&P 500 log returns')
plt.ylabel('Quantiles')
plt.show()

plt.scatter(norm.ppf(real_quantiles),
Exemplo n.º 12
0
prob = prob / np.sum(prob)
sin_hist = tot * prob

incl = dataB3['inclination_B3'][ind]
incl_rad = incl.to(u.radian).value
incl_filtered = incl_rad % np.pi / 2

x = np.random.rand(10000)
y = np.arccos(1 - x)

#2 sample ks test
Dval, pval = ks_2samp(incl_rad, y)
print('2 sample KS statistic: %f, p value: %f' % (Dval, pval))

#1 sample test
Dval, pval = ks_1samp(np.cos(incl_rad), uniform.cdf)
print('1 sample KS statistic: %f, p value: %f' % (Dval, pval))

#1 sample test
Dval, pval = ks_1samp(incl_rad, lambda x: 1 - np.cos(x))
print('1 sample KS statistic: %f, p value: %f' % (Dval, pval))

minor = data['fwhm_min_deconv_B3'][np.isnan(data['fwhm_min_deconv_B3']) ==
                                   False]
major = data['fwhm_maj_deconv_B3'][np.isnan(data['fwhm_min_deconv_B3']) ==
                                   False]
inclination = np.arccos(minor / major)
print(ks_1samp(np.cos(inclination), uniform.cdf))

#KDE
#cos_grid = np.linspace(0,1,100)
Exemplo n.º 13
0
 def ks_test(self):
     return stats.ks_1samp(self.X, self.cdf)