Python ppf примеры, scipy.stats.ncx2.ppf Python примеры использования

Пример #1

0

Показать файл

Файл: approximate_non_central_chi_squared_demonstrations.py Проект: oliversheridanmethven/approximate_random_variables

def non_central_chi_squared_polynomial_approximation_timing():
    """ We assess the speed of a piecewise polynomial approximation to the non-central chi-squared distribution. """
    nus = [1, 5, 10, 50]
    lambdas = [1, 5, 10, 50]

    n = 10000
    res = {}
    for nu in nus:
        res[nu] = {}
        ncx2_approx = construct_inverse_non_central_chi_squared_interpolated_polynomial_approximation(
            nu)
        for l in lambdas:
            u = uniform.rvs(size=n)
            start = timer()
            ncx2.ppf(u, df=nu, nc=l)
            elapsed_ncx2 = (timer() - start) / n
            start = timer()
            ncx2_approx(u, non_centrality=l)
            elapsed_norm = (timer() - start) / n
            res[nu][l] = round((elapsed_ncx2 / elapsed_norm), 1)

    df = pd.DataFrame(res)
    df.index = df.index.rename('lambda')
    df.columns = df.columns.rename('nu')
    print(df)

Пример #2

0

Показать файл

    def fit(self, X, y):
        if self.gpu:
            X = X.cuda()
        assert (np.logical_or(y == 0,
                              y == 1)).all()  # Only binary-classification now.
        self.train_X = X
        self.train_y = y
        self.Xdim = X.shape[1]

        # Determine the bucket boundaries.
        lb = ncx2.ppf(1e-4, self.Xdim * self.bucket_shrink, 0)
        ub = ncx2.ppf(1 - 1e-4, self.Xdim,
                      self.Xdim * self.bucket_shrink / self.sigma2)
        self.buckets = np.linspace(lb, ub, num=self.N_bucket) * self.sigma2

Пример #3

0

Показать файл

def asymptotic_p_value(asimov_q, use_median_rather_than_asimov=False):
    if use_median_rather_than_asimov:
        median_q = ncx2.ppf(0.5, df=2, nc=max(0., asimov_q))
        p_value = chi2.sf(median_q, df=2)
    else:
        p_value = chi2.sf(asimov_q, df=2)
    return p_value

Пример #4

0

Показать файл

Файл: R_Functions.py Проект: PablodelaCuesta/Statistics

def qchisq(p,df,ncp=0):
    """
    Calculates the quantile function of the chi-square distribution
    """
    from scipy.stats import chi2,ncx2
    if ncp==0:
        result=chi2.ppf(q=p,df=df,loc=0,scale=1)
    else:
        result=ncx2.ppf(q=p,df=df,nc=ncp,loc=0,scale=1)
    return result

Пример #5

0

Показать файл

Файл: plots.py Проект: oliversheridanmethven/approximating_inverse_cumulative_distribution_functions

def plot_non_central_chi_squared_polynomial_approximation(
        savefig=False, plot_from_json=True):
    if plot_from_json:
        with open('non_central_chi_squared_linear_approximation.json',
                  "r") as input_file:
            results = json.load(input_file)
        results = {
            k: {x: {float(u): w
                    for u, w in y.items()}
                for x, y in v.items()}
            for k, v in results.items()
        }
    else:
        dof = 1.0
        ncx2_approx = construct_inverse_non_central_chi_squared_interpolated_polynomial_approximation(
            dof, n_intervals=4 + 1)
        u = np.concatenate([
            np.linspace(0.0, 1.0, 1000)[:-1],
            np.logspace(-10, -1, 100), 1.0 - np.logspace(-10, -1, 100)
        ])
        u.sort()
        non_centralities = [1.0, 10.0, 20.0]
        results = {non_centrality: {} for non_centrality in non_centralities}
        for non_centrality in results:
            exact, approximate = ncx2.ppf(u, df=dof,
                                          nc=non_centrality), ncx2_approx(
                                              u, non_centrality=non_centrality)
            results[non_centrality]['exact'] = {x: y for x, y in zip(u, exact)}
            results[non_centrality]['approximate'] = {
                x: y
                for x, y in zip(u, approximate)
            }

    plt.clf()
    for non_centrality in results:
        exact, approximate = results[non_centrality]['exact'], results[
            non_centrality]['approximate']
        plt.plot(*zip(*exact.items()), 'k--')
        plt.plot(*zip(*approximate.items()), 'k,')
    plt.plot([], [], 'k--', label=r'$C^{-1}_{\nu}(x;\lambda)$')
    plt.plot([], [], 'k-', label=r'$\tilde{C}^{-1}_{\nu}(x;\lambda)$')
    plt.ylim(0, 50)
    plt.yticks([i for i in range(0, 51, 10)])
    plt.xticks([0, 1])
    plt.xlabel(r'$x$')
    plt.legend(frameon=False)
    if savefig:
        plt.savefig('non_central_chi_squared_linear_approximation.pdf',
                    format='pdf',
                    bbox_inches='tight',
                    transparent=True)
        if not plot_from_json:
            with open('non_central_chi_squared_linear_approximation.json',
                      "w") as output_file:
                output_file.write(json.dumps(results, indent=4))

Пример #6

0

Показать файл

 def predict_lambda_and_percentiles(self, Xnew, lower=5, upper=95):
     """
     Computes mean value of intensity and lower and upper percentiles.
     `lower` and `upper` must be between 0 and 100.
     """
     # f ~ Normal(mean_f, var_f)
     mean_f, var_f = self.predict_f(Xnew)
     # λ = E[f²] = E[f]² + Var[f]
     lambda_mean = mean_f**2 + var_f
     # g = f/√var_f ~ Normal(mean_f/√var_f, 1)
     # g² = f²/var_f ~ χ²(k=1, λ=mean_f²/var_f) non-central chi-squared
     m2ov = mean_f**2 / var_f
     if tf.reduce_any(m2ov > 10e3):
         raise ValueError("scipy.stats.ncx2.ppf() flatlines for nc > 10e3")
     f2ov_lower = ncx2.ppf(lower / 100, df=1, nc=m2ov)
     f2ov_upper = ncx2.ppf(upper / 100, df=1, nc=m2ov)
     # f² = g² * var_f
     lambda_lower = f2ov_lower * var_f
     lambda_upper = f2ov_upper * var_f
     return lambda_mean, lambda_lower, lambda_upper

Пример #7

0

Показать файл

Файл: approximate_non_central_chi_squared_demonstrations.py Проект: oliversheridanmethven/approximate_random_variables

def plot_non_central_chi_squared_polynomial_approximation(save_figure=False):
    """ Plots a polynomial approximation to the non-central chi-squared. """
    u = linspace(0.0, 1.0, 10000)[:-1]  # Excluding the end points.
    dof = 1.0
    non_centralities = [1.0, 10.0, 20.0]
    clear_plot()
    for non_centrality in non_centralities:
        ncx2_approx = construct_inverse_non_central_chi_squared_interpolated_polynomial_approximation(
            dof, n_intervals=4)
        plot(u, ncx2.ppf(u, df=dof, nc=non_centrality), 'k--')
        plot(u, ncx2_approx(u, non_centrality=non_centrality), 'k,')
    savefig(
        'piecewise_polynomial_approximation_of_non_central_chi_squared.pdf',
        format='pdf',
        bbox_inches='tight',
        transparent=True)

Пример #8

0

Показать файл

Файл: plots.py Проект: oliversheridanmethven/approximating_inverse_cumulative_distribution_functions

def rmse_of_non_central_chi_squared_polynomial_approximations():
    lambdas = [1, 5, 10, 50, 100, 200]
    nus = [1, 5, 10, 50, 100]
    poly_orders = [1, 3, 5]
    n_intervals = 16
    results = {
        poly_order: {nu: {}
                     for nu in nus}
        for poly_order in poly_orders
    }
    for poly_order in poly_orders:
        for nu in nus:
            ncx2_approx = construct_inverse_non_central_chi_squared_interpolated_polynomial_approximation(
                dof=nu,
                n_intervals=n_intervals + 1,
                polynomial_order=poly_order)
            discontinuities = sorted(
                [0.5**(i + 2) for i in range(n_intervals)] + [0.5] +
                [1.0 - 0.5**(i + 2) for i in range(n_intervals)])
            for l in lambdas:
                rmse = integrate(lambda u: (ncx2.ppf(
                    u, df=nu, nc=l) - ncx2_approx(u, non_centrality=l))**2,
                                 0,
                                 1,
                                 points=discontinuities,
                                 limit=50 + 10 * len(discontinuities))[0]**0.5
                results[poly_order][nu][l] = rmse

    for poly_order, result in results.items():
        df = pd.DataFrame(result)
        df.index = df.index.rename('lambda')
        df.columns = df.columns.rename('nu')
        print(poly_order, df.min().min(), df.max().max())
        print(round(df, 3))
        print('\n')
        print(
            round(df,
                  3).apply(lambda x: ' & '.join([str(i)
                                                 for i in list(x)]) + r' \\',
                           axis=1))
        print('\n' * 3)

Пример #9

0

Показать файл

Файл: plots.py Проект: oliversheridanmethven/approximating_inverse_cumulative_distribution_functions

def produce_cox_ingersoll_ross_paths(dt, approximations=None, **kwargs):
    assert isinstance(
        dt, float) and np.isfinite(dt) and dt > 0 and (1.0 / dt).is_integer()
    assert approximations is not None
    # The parameters.
    params = kwargs
    kappa, theta, sigma = params['kappa'], params['theta'], params['sigma']
    T = 1.0
    x_0 = 1.0
    dt = dt * T
    sqrt_t = dt**0.5
    c1 = 4.0 * kappa / (sigma**2 * (1.0 - np.exp(-kappa * dt)))
    c2 = c1 * np.exp(-kappa * dt)
    df = 4.0 * kappa * theta / (sigma**2)

    euler_maruyama_update = lambda x, w, t: x + kappa * (
        theta - x) * t + sigma * np.sqrt(np.fabs(x)) * w
    exact_update = lambda u, x: ncx2.ppf(u, df=df, nc=x * c2) / c1
    approximate_update = lambda u, x, approx: approx(u, non_centrality=x * c2)[
        0] / c1

    x_exact = x_0
    x_euler_maruyama = x_0
    x_approximations = [x_0] * len(approximations)

    n_increments = int(1.0 / dt)

    for n in range(n_increments):
        u = np.random.uniform()
        z = norm.ppf(u)
        dw = sqrt_t * z
        x_euler_maruyama = euler_maruyama_update(x_euler_maruyama, dw, dt)
        x_exact = exact_update(u, x_exact)
        x_approximations = [
            approximate_update(u, x_approximate, approx)
            for approx, x_approximate in zip(approximations, x_approximations)
        ]

    return [x_euler_maruyama, x_exact, *x_approximations]

Пример #10

0

Показать файл

Файл: approximate_non_central_chi_squared.py Проект: oliversheridanmethven/approximate_random_variables

def construct_inverse_non_central_chi_squared_interpolated_polynomial_approximation(dof, polynomial_order=1, n_intervals=16, n_interpolating_functions=16):
    """
    Computes a polynomial approximation to the inverse cumulative distribution function for the non-central
    chi-squared distribution for a fixed number of degrees of freedom. The approximation is parametrised
    by a non-central parameter
    :param dof: Float.
    :param polynomial_order: Int.
    :param n_intervals: Int.
    :param n_interpolating_functions: Int.
    :return: Function.
    """
    interpolation_function = lambda f: f ** 0.5
    interpolation_function_deriv_first = lambda f: 0.5 * f ** -0.5
    interpolation_function_deriv_second = lambda f: -0.25 * f ** -1.5

    # We approximate the function P
    interpolation_function_contour_spacing = 1.0 / (n_interpolating_functions - 1)
    interpolation_values = ([interpolation_function(1.0) - n * interpolation_function_contour_spacing for n in range(n_interpolating_functions - 1)] + [interpolation_function(0)])[::-1]  # interpolation key values
    interpolation_points = [0.0] + [root_scalar(lambda a: interpolation_function(a) - y, x0=0.5, bracket=[0.0, 1.0], fprime=interpolation_function_deriv_first, fprime2=interpolation_function_deriv_second).root for y in interpolation_values[1:-1]] + [1.0]  # non-centrality for interpolating functions
    functions_exact = [None] * n_interpolating_functions  # The exact functions
    functions_exact[0] = norm.ppf  # Limiting case as y -> 0
    # The following odd syntax with y=... ensures y is evaluated at declaration and not taken by reference:
    functions_exact[1:-1] = [lambda u, y=y_interpolation_points: np.sqrt(dof / (4.0 * y)) * (y / dof * ncx2.ppf(u, df=dof, nc=(1.0 - y) * dof / y) - 1.0) for y_interpolation_points in interpolation_points[1:-1]]
    functions_exact[-1] = lambda u: np.sqrt(dof / 4.0) * (1.0 / dof * chi2.ppf(u, df=dof) - 1.0)
    functions_approx = [dyadic_function_approximation_constructor(f, n_intervals, polynomial_order) for f in progressbar(functions_exact)]  # By piecewise dyadic construction

    def construct_linear_interpolation(functions, weightings):
        """
        Builds a linear interpolation between two functions.
        :param functions: List.
        :param weightings: List.
        :return: Function.
        """
        f1, f2 = functions
        w1, w2 = weightings
        return lambda u: f1(u) * w1 + f2(u) * w2

    def get_interpolation_functions_and_weightings(non_centrality):
        """
        Determines the interpolation functions to use and their weights.
        :param non_centrality: Float.
        :return: List.
        """
        interpolation_value = interpolation_function(non_centrality)
        insertion_index = bisect(interpolation_values, interpolation_value, lo=0)
        lower_index, upper_index = insertion_index - 1, insertion_index
        assert lower_index >= 0
        assert upper_index <= len(interpolation_values)
        if upper_index == len(interpolation_values):
            return [[functions_approx[lower_index]] * 2, [1.0, 0.0]]
        functions = [functions_approx[i] for i in [lower_index, upper_index]]
        interpolation_lower, interpolation_upper = [interpolation_values[i] for i in [lower_index, upper_index]]
        w_lower = (interpolation_upper - interpolation_value) / (interpolation_upper - interpolation_lower)
        w_upper = 1.0 - w_lower
        weights = [w_lower, w_upper]
        return [functions, weights]

    def inverse_non_central_chi_squared_interpolated_polynomial_approximation(u, non_centrality):
        """
        Polynomial approximation to the inverse cumulative distribution function for the non-central
        chi-squared distribution
        :param u: Array.
        :param non_centrality: Float.
        :return: Array.
        """
        functions, weightings = get_interpolation_functions_and_weightings(dof / (non_centrality + dof))
        interpolated_function = construct_linear_interpolation(functions, weightings)
        return non_centrality + dof + 2.0 * np.sqrt(non_centrality + dof) * interpolated_function(u)

    return inverse_non_central_chi_squared_interpolated_polynomial_approximation

Пример #11

0

Показать файл

Файл: non-central-chi2.py Проект: ecandes/stats300c

import numpy as np
from scipy.stats import ncx2
from scipy.stats import norm
from scipy.stats import chi2

import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300

#%%

df = 10_000
theta = 2
nc = np.sqrt(2 * df) * theta

x = np.linspace(ncx2.ppf(0.001, df, nc), ncx2.ppf(0.999, df, nc), 1000)

plt.plot(x, ncx2.pdf(x, df, nc), label='theta = {:.2f}'.format(theta))
plt.legend()
plt.show()

#%%


def sample_nc2x(theta=0, df=100, size=1000):
    nc = theta * np.sqrt(2 * df)
    Y = chi2.rvs(df - 1, size=size)
    Z = norm.rvs(size=size)
    Y += nc + 2 * np.sqrt(nc) * Z + Z**2
    return Y

Пример #12

0

Показать файл

    Comparison of the non-central chi-squared to the Gaussian.
"""

import pandas as pd
from scipy.stats import uniform, ncx2, norm
from timeit import default_timer as timer

if __name__ == '__main__':
    nus = [1, 5, 10, 50]
    lambdas = [1, 5, 10, 50, 100, 200]

    n = 10000
    res = {}
    for nu in nus:
        res[nu] = {}
        for l in lambdas:
            u = uniform.rvs(size=n)
            start = timer()
            ncx2.ppf(u, df=nu, nc=l)
            elapsed_ncx2 = (timer() - start) / n
            start = timer()
            norm.ppf(u)
            elapsed_norm = (timer() - start) / n
            res[nu][l] = int(elapsed_ncx2 / elapsed_norm)

    df = pd.DataFrame(res)
    df.index = df.index.rename('lambda')
    df.columns = df.columns.rename('nu')
    print df

Пример #13

0

Показать файл

    linewidth=0.8)
ax_cdf = ax.twinx()
ax_cdf.set_ylabel('cdf')
ax.set_xlim((0, 40))
ax.xaxis.set_major_locator(MultipleLocator(5))
ax.xaxis.set_minor_locator(MultipleLocator(1))
xlim = ax.get_xlim()
ax_cdf.hist(random,
            density=True,
            bins=cum_bins,
            cumulative=True,
            histtype='step',
            color='black',
            linewidth=0.6)
ax_cdf.plot(x, ncx2.cdf(x, df, nc), c='red', linewidth=0.8)
cl_95 = ncx2.ppf(0.954, df, nc)
cl_99 = ncx2.ppf(0.997, df, nc)
ax_cdf.plot((cl_95, cl_95), (0, 1), c='black', linestyle='-.', linewidth=0.5)
ax_cdf.plot((cl_99, cl_99), (0, 1), c='black', linestyle='--', linewidth=0.5)
ax_cdf.set_ylim((0, 1))
ax_cdf.text(cl_95, 0.5, '$2\sigma$', rotation=90)
ax_cdf.text(cl_99, 0.5, '$3\sigma$', rotation=90)

# convert the actual data/random numbers into a pdf and cfd!
# n, bins is the data and bin boundaries, respectively
# works, but does not look so good! need improvement on optics
data_rv = rv_histogram((n, bins))
fig_2 = plt.figure(2, dpi=150)
ax_2 = fig_2.add_subplot(111)
ax_2.xaxis.set_major_locator(MultipleLocator(5))
ax_2.xaxis.set_minor_locator(MultipleLocator(1))

Пример #14

0

Показать файл

Файл: scipy-stats-ncx2-1.py Проект: dacsgb/Comp-Methods

from scipy.stats import ncx2
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

df, nc = 21, 1.06
mean, var, skew, kurt = ncx2.stats(df, nc, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(ncx2.ppf(0.01, df, nc), ncx2.ppf(0.99, df, nc), 100)
ax.plot(x, ncx2.pdf(x, df, nc), 'r-', lw=5, alpha=0.6, label='ncx2 pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = ncx2(df, nc)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = ncx2.ppf([0.001, 0.5, 0.999], df, nc)
np.allclose([0.001, 0.5, 0.999], ncx2.cdf(vals, df, nc))
# True

# Generate random numbers:

Пример #15

0

Показать файл

def docalc(args, data, len_data, sims, len_sims, error):
    """
	# Fitness Calculation Template:
	if set(args.error).issuperset(set(['the-acronysm'])):
		1. func = 0

		2. func = an algebraic expression combining the data average (data_avrg), data standard deviation (data_stdv), simulation average (sims_stdv),
		simulation standard deviation (sims_stdv), single experimental files (data.loc[i]), and/or simulation files (sims.loc[j])
		Note1: Perform two for-loops if using data.loc[i] and sims.loc[j].
		Note2: Please consider these variables are DataFrames, meaning that multiplication and division are methods (e.g. df1.division(df2))

		3. Drop NaN values (from experimental time points without simulated values, or simulated values without experimental data)
		with dropna(axis = 0, how = 'all').dropna(axis = 1, how = 'all'). Also transform Inf values with replace([numpy.inf, -numpy.inf], numpy.nan)

		4. Sum the two dimensions, and return a 6 float points scientific notation number (0 float points for statistical tests):
		error['the-acronysm'] = '{:.6e}'.format(func.dropna(axis = 0, how = 'all').dropna(axis = 1, how = 'all').sum().sum())
	"""

    if args.do_all:
        args.error = [
            'SDA', 'ADA', 'SSQ', 'CHISQ', 'MNSE', 'PWSD', 'APWSD', 'NPWSD',
            'ANPWSD', 'MWUT', 'WMWET', 'TOST', 'DUT'
        ]
        """
		SDA    : Squared Difference of Averages
		ADA    : Absolute Difference of Averages
		SSQ    : Sum of SQuares
		CHISQ  : Chi-Square (Differences divided by data standard deviation)
		MNSE   : Mean Normalized Square Error (Differences divided by data average)
		PWSD   : Pair-Wise Square Deviation
		APWSD  : Absolute Pair-Wise Deviation
		NPWSD  : Normalized Pair-Wise Square Deviation
		ANPWSD : Absolute Normalized Pair-Wise Deviation
		MWUT   : Mann-Whitney U-test (Mann and Whitney, 1947, DOI 10.1214/aoms/1177730491)
		WMWET  : Wellek's Mann-Whitney Equivalence Test (Wellek 1996, DOI 10.1002/bimj.4710380608)
		TOST   : Two one-sided t-tests (Dunnet and Gent, 1977, DOI 10.2307/2529457, as well other authors)
		DUT    : Double Mann-Whitney U-tests (Reviewed in Cornell, 1990, DOI 10.1080/03610929008830433)

		More information in https://pleione.readthedocs.io/en/latest/ObjectiveFunctions.html
		"""

        data_avrg = doavrg(data, len_data)
        data_stdv = dostdv(data, len_data)

        sims_avrg = doavrg(sims, len_sims)
        sims_stdv = dostdv(sims, len_sims)

    # former mean square error, now square difference of means
    if set(args.error).issuperset(set(['SDA'])) or set(args.error).issuperset(
            set(['MSE'])):
        func = 0

        if not args.do_all:
            data_avrg = doavrg(data, len_data)
            sims_avrg = doavrg(sims, len_sims)

        func = (data_avrg - sims_avrg)**2

        error['SDA'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # former mean absolute error, now absolute value of the difference of means
    if set(args.error).issuperset(set(['ADA'])) or set(args.error).issuperset(
            set(['MAE'])):
        func = 0

        if not args.do_all:
            data_avrg = doavrg(data, len_data)
            sims_avrg = doavrg(sims, len_sims)

        func = abs(data_avrg - sims_avrg)

        error['ADA'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # sum of squares (from BioNetFit paper)
    if set(args.error).issuperset(set(['SSQ'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += (data.loc[i] - sims.loc[j])**2

        error['SSQ'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # chi-square (from BioNetFit paper)
    if set(args.error).issuperset(set(['CHISQ'])):
        func = 0

        if not args.do_all:
            data_stdv = dostdv(data, len_data)

        for i in range(len_data):
            for j in range(len_sims):
                func += ((data.loc[i] - sims.loc[j]).divide(data_stdv))**2

        error['CHISQ'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # mean normalized square error (from BioNetFit paper)
    if set(args.error).issuperset(set(['MNSE'])):
        func = 0

        if not args.do_all:
            data_avrg = doavrg(data, len_data)

        for i in range(len_data):
            for j in range(len_sims):
                func += ((data.loc[i] - sims.loc[j]).divide(data_avrg))**2

        error['MNSE'] = '{:.6e}'.format(
            func.replace([numpy.inf, -numpy.inf], numpy.nan).dropna(
                axis=0, how='all').dropna(axis=1, how='all').sum().sum())

    # pair-wise square deviation
    if set(args.error).issuperset(set(['PWSD'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += ((data.loc[i] - sims.loc[j])**2).divide(len_data *
                                                                len_sims)

        error['PWSD'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # pair-wise absolute deviation
    if set(args.error).issuperset(set(['APWSD'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += (abs(data.loc[i] - sims.loc[j])).divide(len_data *
                                                                len_sims)

        error['APWSD'] = '{:.6e}'.format(
            func.dropna(axis=0, how='all').dropna(axis=1,
                                                  how='all').sum().sum())

    # normalized pair-wise square deviation (also implemented in BioNetFit as equation 3, but not normalized by the number of data * sims)
    if set(args.error).issuperset(set(['NPWSD'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += (((data.loc[i] - sims.loc[j]).divide(
                    data.loc[i]))**2).divide(len_data * len_sims)

        error['NPWSD'] = '{:.6e}'.format(
            func.replace([numpy.inf, -numpy.inf], numpy.nan).dropna(
                axis=0, how='all').dropna(axis=1, how='all').sum().sum())

    # normalized pair-wise absolute deviation
    if set(args.error).issuperset(set(['ANPWSD'])):
        func = 0

        for i in range(len_data):
            for j in range(len_sims):
                func += (abs((data.loc[i] - sims.loc[j]).divide(
                    data.loc[i]))).divide(len_data * len_sims)

        error['ANPWSD'] = '{:.6e}'.format(
            func.replace([numpy.inf, -numpy.inf], numpy.nan).dropna(
                axis=0, how='all').dropna(axis=1, how='all').sum().sum())
    """
	Wellek's Mann-Whitney Equivalence Test.
	Based on mawi.R script from the EQUIVNONINF package
	modifications done to perform the test "vectorized"
	(it compares two matrices; the first has all exp data, the second all the simulations)
	"""
    if set(args.error).issuperset(set(['WMWET'])):
        from scipy.stats import ncx2
        # useful variables (namespace identical to mawi.R script)
        m = len_data  # x = data
        n = len_sims  # y = sims
        eps1_ = .3129  # Wellek's paper
        eps2_ = .2661  # Wellek's paper
        eqctr = 0.5 + (eps2_ - eps1_) / 2
        eqleng = eps1_ + eps2_

        # estimators needed for calculations
        wxy = pandas.DataFrame(index=sims.loc[0].index,
                               columns=sims.loc[0].columns).fillna(0)
        pihxxy = pandas.DataFrame(index=sims.loc[0].index,
                                  columns=sims.loc[0].columns).fillna(0)
        pihxyy = pandas.DataFrame(index=sims.loc[0].index,
                                  columns=sims.loc[0].columns).fillna(0)
        sigmah = pandas.DataFrame(index=sims.loc[0].index,
                                  columns=sims.loc[0].columns).fillna(0)

        # ŷ estimator (wxy in mawi.R)
        # equation 1.2 from Wellek 1996 paper
        # for (i in 1:m) for (j in 1:n) wxy <- wxy + trunc(0.5 * (sign(x[i] - y[j]) + 1))
        for i in range(m):
            for j in range(n):
                diff = (data.loc[i] - sims.loc[j])
                diff = diff.dropna(axis=0, how='all').dropna(axis=1, how='all')
                diff = diff.apply(numpy.sign)
                diff = diff + 1
                diff = diff.multiply(0.5)
                diff = diff.apply(numpy.trunc)
                # add to ŷ (wxy in mawi.R)
                wxy += diff

        # yFFG estimator (pihxxy in mawi.R)
        # equation 2.5a from Wellek 1996 paper
        #for (i1 in 1:(m - 1)) for (i2 in (i1 + 1):m) for (j in 1:n) pihxxy <- pihxxy + trunc(0.5 * (sign(min(x[i1], x[i2]) - y[j]) + 1))
        for xi1 in range(m - 1):
            for xi2 in range(xi1 + 1, m):
                for xj in range(n):
                    diff = data.loc[xi1].where(data.loc[xi1] < data.loc[xi2],
                                               data.loc[xi2]) - sims.loc[xj]
                    diff = diff.dropna(axis=0, how='all').dropna(axis=1,
                                                                 how='all')
                    diff = diff.apply(numpy.sign)
                    diff = diff + 1
                    diff = diff.multiply(0.5)
                    diff = diff.apply(numpy.trunc)
                    # add to yFGG (pihxxy in mawi.R)
                    pihxxy += diff

        # yFGG estimator (pihxyy in mawi.R)
        # equation 2.5b from Wellek 1996 paper
        # for (i in 1:m) for (j1 in 1:(n - 1)) for (j2 in (j1 + 1):n) pihxyy <- pihxyy + trunc(0.5 * (sign(x[i] - max(y[j1], y[j2])) + 1))
        for xi in range(m):
            for xj1 in range(n - 1):
                for xj2 in range(xj1 + 1, n):
                    diff = (data.loc[xi] - sims.loc[xj1].where(
                        sims.loc[xj1] > sims.loc[xj2], sims.loc[xj2]))
                    diff = diff.dropna(axis=0, how='all').dropna(axis=1,
                                                                 how='all')
                    diff = diff.apply(numpy.sign)
                    diff = diff + 1
                    diff = diff.multiply(0.5)
                    diff = diff.apply(numpy.trunc)
                    # add to yFGG (pihxyy in mawi.R)
                    pihxyy += diff

        # in equation 1.2
        wxy = wxy.divide(m * n)
        # in equation 2.5a, inverse of (m choose 2 = 0.5 * (m-1) * m), then divided by n
        pihxxy = pihxxy.multiply(2).divide(m * (m - 1) * n)
        # in equation 2.5b, inverse of (n choose 2 = 0.5 * (n-1) * n), then divided by m
        pihxyy = pihxyy.multiply(2).divide(n * (n - 1) * m)

        # variance estimator sigmah (same name as in mawi.R)
        # equation 2.6 from Wellek 1996 paper
        # sigmah <- sqrt((wxy - (m + n - 1) * wxy^2 + (m - 1) * pihxxy + (n - 1) * pihxyy)/(m * n))
        sigmah = wxy - (wxy**2).multiply(m + n - 1) + pihxxy.multiply(
            m - 1) + pihxyy.multiply(n - 1)
        sigmah = sigmah.divide(m * n)
        sigmah = sigmah**0.5

        # critical value
        # right hand of inequality 2.8 from Wellek 1996 paper
        phi = ((eqleng / 2) / sigmah)**2
        # crit <- sqrt(qchisq(alpha, 1, (eqleng/2/sigmah)^2))
        # Ca(phi) is the square root of the alpha-th quantile of the chi2-distribution with a single degree of freedom and non-centrality parameter phi square
        crit = pandas.DataFrame(data=ncx2.ppf(0.05, 1, phi),
                                index=sims.loc[0].index,
                                columns=sims.loc[0].columns)**.5

        # compare with Z
        # left hand side of the inequality 2.8 from Wellek 1996 paper
        Z = abs((wxy - eqctr).divide(sigmah))
        z = Z.copy(deep=True)
        """
		we want to maximize the amount of true alternative hypotheses, so
		we purposely changed the values to use the Wellek's test as an objective function to minimize
		"""
        # test the inequality 2.8 from Wellek 1996 paper
        # the test cannot reject null hypothesis: P[X-Y] < .5 - e1 or P[X-Y] > .5 + e2
        Z[z >= crit] = +1.0
        # the null hypothesis is rejected, therefore .5 - e1 < P[X-Y] < .5 + e2
        Z[z < crit] = +0.0

        if args.report:
            print('wxy estimator:\n', wxy, '\n')
            print('pihxxy estimator:\n', pihxxy, '\n')
            print('pihxyy estimator:\n', pihxyy, '\n')
            print('sigmah estimator:\n', sigmah, '\n')
            print('phi matrix:\n', phi, '\n')
            print('critical values:\n', crit, '\n')
            print('Z estimator: \n', Z, '\n')
            print(
                'Wellek\'s test matrix: a zero means data and simulations are equivalents within the threshold\n',
                Z)

        error['WMWET'] = '{:.0f}'.format(Z.sum().sum())

    # the same as WMWET, but as identical as the Wellek's paper (look for the heaviside function)
    if set(args.error).issuperset(set(['WMWET_paper'])):
        from scipy.stats import ncx2

        eps1_ = .3129  # Wellek's paper
        eps2_ = .2661  # Wellek's paper
        eqctr = 0.5 + (eps2_ - eps1_) / 2
        eqleng = eps1_ + eps2_

        # estimators needed for calculations
        wxy = pandas.DataFrame(index=y.loc[0].index,
                               columns=y.loc[0].columns).fillna(0)
        pihxxy = pandas.DataFrame(index=y.loc[0].index,
                                  columns=y.loc[0].columns).fillna(0)
        pihxyy = pandas.DataFrame(index=y.loc[0].index,
                                  columns=y.loc[0].columns).fillna(0)
        sigmah = pandas.DataFrame(index=y.loc[0].index,
                                  columns=y.loc[0].columns).fillna(0)

        # ŷ estimator (wxy in mawi.R)
        # for (i in 1:m) for (j in 1:n) wxy <- wxy + trunc(0.5 * (sign(x[i] - y[j]) + 1))
        for i in range(m):
            for j in range(n):
                diff = (x.loc[i] - y.loc[j]).dropna(axis=0, how='all').dropna(
                    axis=1, how='all')
                wxy += numpy.heaviside(diff, 0)

        # yFFG estimator (pihxxy in mawi.R)
        #for (i1 in 1:(m - 1)) for (i2 in (i1 + 1):m) for (j in 1:n) pihxxy <- pihxxy + trunc(0.5 * (sign(min(x[i1], x[i2]) - y[j]) + 1))
        for xi1 in range(m - 1):
            for xi2 in range(xi1 + 1, m):
                for xj in range(n):
                    diff1 = (x.loc[xi1] - y.loc[xj]).dropna(
                        axis=0, how='all').dropna(axis=1, how='all')
                    diff2 = (x.loc[xi2] - y.loc[xj]).dropna(
                        axis=0, how='all').dropna(axis=1, how='all')
                    pihxxy += numpy.heaviside(diff1, 0) * numpy.heaviside(
                        diff2, 0)

        # yFGG estimator (pihxyy in mawi.R)
        # for (i in 1:m) for (j1 in 1:(n - 1)) for (j2 in (j1 + 1):n) pihxyy <- pihxyy + trunc(0.5 * (sign(x[i] - max(y[j1], y[j2])) + 1))
        for xi in range(m):
            for xj1 in range(n - 1):
                for xj2 in range(xj1 + 1, n):
                    diff1 = (x.loc[xi] - y.loc[xj1]).dropna(
                        axis=0, how='all').dropna(axis=1, how='all')
                    diff2 = (x.loc[xi] - y.loc[xj2]).dropna(
                        axis=0, how='all').dropna(axis=1, how='all')
                    pihxyy += numpy.heaviside(diff1, 0) * numpy.heaviside(
                        diff2, 0)

        #
        wxy = wxy.divide(m * n)
        pihxxy = pihxxy.multiply(2).divide(m * (m - 1) * n)
        pihxyy = pihxyy.multiply(2).divide(n * (n - 1) * m)

        # variance estimator sigmah (same name as in mawi.R)
        # sigmah <- sqrt((wxy - (m + n - 1) * wxy^2 + (m - 1) * pihxxy + (n - 1) * pihxyy)/(m * n))
        sigmah = wxy - (wxy**2).multiply(m + n - 1) + pihxxy.multiply(
            m - 1) + pihxyy.multiply(n - 1)
        sigmah = sigmah.divide(m * n)
        sigmah = sigmah**0.5

        # critical value
        # crit <- sqrt(qchisq(alpha, 1, (eqleng/2/sigmah)^2))
        phi = (eqleng / 2 / sigmah)**2
        crit = pandas.DataFrame(data=ncx2.ppf(0.05, 1, phi),
                                index=y.loc[0].index,
                                columns=y.loc[0].columns)**.5

        # compare with Z
        Z = abs((wxy - eqctr).divide(sigmah))
        z = Z.copy(deep=True)
        Z[z <
          crit] = +0.0  # the null hypothesis is rejected, therefore .5 - e1 < P[X-Y] < .5 + e2
        Z[z >=
          crit] = +1.0  # the test cannot reject the null hypothesis: P[X-Y] < .5 - e1 or P[X-Y] > .5 + e2

        if args.report:
            print('wxy estimator:\n', wxy, '\n')
            print('pihxxy estimator:\n', pihxxy, '\n')
            print('pihxyy estimator:\n', pihxyy, '\n')
            print('sigmah estimator:\n', sigmah, '\n')
            print('phi matrix:\n', phi, '\n')
            print('critical values:\n', crit, '\n')
            print('Z estimator: \n', Z, '\n')
            print(
                'Wellek\'s test matrix: a zero means data and simulations are equivalents within the threshold\n',
                Z)

        error['WMWET_paper'] = '{:.0f}'.format(Z.sum().sum())

    if set(args.error).issuperset(set(['TOST'])):
        print(
            "WARNING: data and/or simulations not necessarily are normal distributions."
        )
        print(
            "As a test-bed, we consider data and simulations have unequal standard deviations"
        )
        print(
            "See https://www.statsmodels.org/devel/generated/statsmodels.stats.weightstats.ttost_ind.html for more information"
        )
        from statsmodels.stats.weightstats import ttost_ind

        if not args.do_all:
            data_stdv = dostdv(data, len_data)

        # reshape data and sims to allow calculate the test in a for-loop
        tost_sims = numpy.dstack([sims.loc[x] for x in range(len_sims)])
        # since we operate numpy arrays without labels, we must ensure sims and data indexes and columns have the same order
        index = data.loc[0].index
        columns = data.loc[0].columns
        tost_data = numpy.dstack([
            data.loc[x].reindex(columns=columns, index=index)
            for x in range(len_data)
        ])

        p = numpy.zeros((len(data_stdv.index), len(data_stdv.columns)))
        row = 0
        for x, y, lim in zip(tost_sims, tost_data, data_stdv.values):
            for col, _ in enumerate(data_stdv.columns):
                p[row, col] = ttost_ind(x[col], y[col], -lim[col],
                                        +lim[col])[0]
            row += 1

        # transform matrix of p-values into a non-rejection DataFrame (if p-value less than 5% -> rejects, but set to zero)
        p = pandas.DataFrame(index=index, columns=columns, data=p)
        P = p.copy(deep=True)
        P[p >= .05] = +1.0
        P[p < .05] = +0.0

        if args.report:
            print(
                'Two one-sided t-tests matrix: a zero means data and simulations are equivalents within one standard deviation threshold\n',
                P)

        error['TOST'] = '{:.0f}'.format(P.sum().sum())

    # Mann-Whitney U-test
    def mwut(data, sims, alternative):
        ucrit = pandas.read_csv(args.crit,
                                sep=None,
                                engine='python',
                                header=0,
                                index_col=0)
        udata = pandas.DataFrame(index=sims.loc[0].index,
                                 columns=sims.loc[0].columns).fillna(0)
        usims = pandas.DataFrame(index=sims.loc[0].index,
                                 columns=sims.loc[0].columns).fillna(0)

        for i in range(len_data):
            for j in range(len_sims):
                Diff = (data.loc[i] - sims.loc[j]).dropna(
                    axis=0, how='all').dropna(axis=1, how='all')
                diff = Diff.copy(deep=True)
                # transform data
                # if data < sims, count -1.0
                Diff[diff < 0] = -1.0
                # if data > sims, count +1.0
                Diff[diff > 0] = +1.0
                # if data = sims, count +0.5
                Diff[diff == 0] = +0.5
                # count how many times is data < sims (udata and usims are complementary)
                diff = Diff.copy(deep=True)
                udata += Diff[diff == -1.0].fillna(0).divide(-1) + Diff[
                    diff == +0.5].fillna(0)
                usims += Diff[diff == +1.0].fillna(0).divide(+1) + Diff[
                    diff == +0.5].fillna(0)

        if alternative == 'two-sided':
            # bigU is max(udata, usims), where udata and usims are DataFrames
            bigU = udata.where(udata >= usims).fillna(
                usims.where(usims >= udata))
        if alternative == 'less':
            bigU = udata
        if alternative == 'greater':
            bigU = usims

        U = len_data * len_sims - bigU
        u = U.copy(deep=True)
        # U is significant if it is less than or equal to a critical value
        U[u <= ucrit.loc[len_sims, str(len_data)]] = +1.0
        U[u > ucrit.loc[len_sims, str(len_data)]] = +0.0

        if args.report:
            print('U-estimator for data\n', udata, '\n')
            print('U-estimator for sims\n', usims, '\n')
            if alternative == 'two-sided':
                print(
                    'U-test matrix: A one means data and sims are differents\n',
                    U, '\n')
            if alternative == 'less':
                print(
                    'U-test matrix: A one means data is smaller than sims (shifted to the right)\n',
                    U, '\n')
            if alternative == 'greater':
                print(
                    'U-test matrix: A one means data is greater than sims (shifted to the left)\n',
                    U, '\n')

        return '{:.0f}'.format(U.sum().sum()), U

    if set(args.error).issuperset(set(['MWUT'])):
        if (len_data >= 3 and len_sims >= 3):
            error['MWUT'] = mwut(data, sims, 'two-sided')[0]
        else:
            error['MWUT'] = str(numpy.nan)

    if set(args.error).issuperset(set(['DUT'])):
        if (len_data >= 3 and len_sims >= 3):
            # set what the user wants
            if args.lower is not None and args.upper is None:
                args.upper = args.lower  # symmetric equivalence interval
            if args.lower is None and args.upper is not None:
                args.lower = args.upper  # symmetric equivalence interval

            if args.lower is None and args.upper is None:
                if not args.do_all:
                    if args.stdv == 'sims':
                        lower = upper = dostdv(sims, len_sims)
                    else:
                        lower = upper = dostdv(data, len_data)
                else:
                    if args.stdv == 'sims':
                        lower = upper = sims_stdv
                    else:
                        lower = upper = data_stdv

            # divide by factor
            lower = lower / float(args.factor)
            upper = upper / float(args.factor)

            # copy simulations to a temporary variable
            tmp = sims

            # test lower limit
            new_sims = []
            for i in range(len_sims):
                new_sims.append(tmp.loc[i] - lower)
            sims = pandas.concat(new_sims, keys=range(len_sims))

            # test data > sims - lower with one-tail U-test
            LB = mwut(data, sims, 'greater')[1]

            # test upper limit
            new_sims = []
            for i in range(len_sims):
                new_sims.append(tmp.loc[i] + upper)
            sims = pandas.concat(new_sims, keys=range(len_sims))

            # test data < sims + upper with one
            UB = mwut(data, sims, 'less')[1]

            # rejection DataFrame (U-test report with ones true alternative hypotheses)
            # both one-sided tests should reject the null hypotheses
            U = LB * UB
            # However, we minimize the number of non-rejected null hypotheses
            # transform U into a non-rejection DataFrame.
            U = numpy.logical_xor(U.values, 1).astype(int)
            U = pandas.DataFrame(index=LB.index, columns=LB.columns, data=U)

            if args.report:
                print(
                    'Double U-test matrix: 1.0 means data and sims are not equivalents if sims are shifted:\n',
                    U, '\n')

            error['DUT'] = '{:.0f}'.format(U.sum().sum())

        else:
            error['DUT'] = str(numpy.nan)

Python ppf примеры использования