Exemplo n.º 1
0
    def test_homogeneity(self):
        """Test whether the means of all samples are the same

        currently no options, test uses chisquare distribution
        default might change depending on `use_t`

        Returns
        -------
        res : HolderTuple instance
            The results include the following attributes:

            - statistic : float
                Test statistic, ``q`` in meta-analysis, this is the
                pearson_chi2 statistic for the fixed effects model.
            - pvalue : float
                P-value based on chisquare distribution.
            - df : float
                Degrees of freedom, equal to number of studies or samples
                minus 1.
        """
        pvalue = stats.chi2.sf(self.q, self.k - 1)
        res = HolderTuple(statistic=self.q,
                          pvalue=pvalue,
                          df=self.k - 1,
                          distr="chi2")
        return res
Exemplo n.º 2
0
    def test_prob_superior(self, value=0.5, alternative="two-sided"):
        """test for superiority probability

        H0: P(x1 > x2) + 0.5 * P(x1 = x2) = value

        The alternative is that the probability is either not equal, larger
        or smaller than the null-value depending on the chosen alternative.

        Parameters
        ----------
        value : float
            Value of the probability under the Null hypothesis.
        alternative : str
            The alternative hypothesis, H1, has to be one of the following

               * 'two-sided' : H1: ``prob - value`` not equal to 0.
               * 'larger' :   H1: ``prob - value > 0``
               * 'smaller' :  H1: ``prob - value < 0``

        Returns
        -------
        res : HolderTuple
            HolderTuple instance with the following main attributes

            statistic : float
                Test statistic for z- or t-test
            pvalue : float
                Pvalue of the test based on either normal or t distribution.

        """

        p0 = value  # alias
        # diff = self.prob1 - p0  # for reporting, not used in computation
        # TODO: use var_prob
        std_diff = np.sqrt(self.var / self.nobs)

        # TODO: return HolderTuple
        # corresponds to a one-sample test and either p0 or diff could be used
        if not self.use_t:
            stat, pv = _zstat_generic(self.prob1,
                                      p0,
                                      std_diff,
                                      alternative,
                                      diff=0)
            distr = "normal"
        else:
            stat, pv = _tstat_generic(self.prob1,
                                      p0,
                                      std_diff,
                                      self.df,
                                      alternative,
                                      diff=0)
            distr = "t"

        res = HolderTuple(statistic=stat,
                          pvalue=pv,
                          df=self.df,
                          distribution=distr)
        return res
Exemplo n.º 3
0
def test_cov_blockdiagonal(cov, nobs, block_len):
    r"""One sample hypothesis test that covariance is block diagonal.

    The Null and alternative hypotheses are

    .. math::

       H0 &: \Sigma = diag(\Sigma_i) \\
       H1 &: \Sigma \neq diag(\Sigma_i)

    where :math:`\Sigma_i` are covariance blocks with unspecified values.

    Parameters
    ----------
    cov : array_like
        Covariance matrix of the data, estimated with denominator ``(N - 1)``,
        i.e. `ddof=1`.
    nobs : int
        number of observations used in the estimation of the covariance
    block_len : list
        list of length of each square block

    Returns
    -------
    res : instance of HolderTuple
        results with ``statistic, pvalue`` and other attributes like ``df``

    References
    ----------
    Rencher, Alvin C., and William F. Christensen. 2012. Methods of
    Multivariate Analysis: Rencher/Methods. Wiley Series in Probability and
    Statistics. Hoboken, NJ, USA: John Wiley & Sons, Inc.
    https://doi.org/10.1002/9781118391686.

    StataCorp, L. P. Stata Multivariate Statistics: Reference Manual.
    Stata Press Publication.
    """
    cov = np.asarray(cov)
    cov_blocks = _get_blocks(cov, block_len)[0]
    k = cov.shape[0]
    k_blocks = [c.shape[0] for c in cov_blocks]
    if k != sum(k_blocks):
        msg = "sample covariances and blocks do not have matching shape"
        raise ValueError(msg)
    logdet_blocks = sum(_logdet(c) for c in cov_blocks)
    a2 = k**2 - sum(ki**2 for ki in k_blocks)
    a3 = k**3 - sum(ki**3 for ki in k_blocks)

    statistic = (nobs - 1 - (2 * a3 + 3 * a2) / (6. * a2))
    statistic *= logdet_blocks - _logdet(cov)

    df = a2 / 2
    pvalue = stats.chi2.sf(statistic, df)
    return HolderTuple(statistic=statistic,
                       pvalue=pvalue,
                       df=df,
                       distr="chi2",
                       null="block-diagonal"
                       )
Exemplo n.º 4
0
def cov_test(cov, nobs, cov_null):
    """One sample hypothesis test for covariance equal to null covariance

    The Null hypothesis is that cov = cov_null, against the alternative that
    it is not equal to cov_null

    Parameters
    ----------
    cov : array_like
        Covariance matrix of the data, estimated with denominator ``(N - 1)``,
        i.e. `ddof=1`.
    nobs : int
        number of observations used in the estimation of the covariance
    cov_null : nd_array
        covariance under the null hypothesis

    Returns
    -------
    res : instance of HolderTuple
        results with ``statistic, pvalue`` and other attributes like ``df``

    References
    ----------
    Bartlett, M. S. 1954. “A Note on the Multiplying Factors for Various Χ2
    Approximations.” Journal of the Royal Statistical Society. Series B
    (Methodological) 16 (2): 296–98.

    Rencher, Alvin C., and William F. Christensen. 2012. Methods of
    Multivariate Analysis: Rencher/Methods. Wiley Series in Probability and
    Statistics. Hoboken, NJ, USA: John Wiley & Sons, Inc.
    https://doi.org/10.1002/9781118391686.

    StataCorp, L. P. Stata Multivariate Statistics: Reference Manual.
    Stata Press Publication.

    """
    # using Stata formulas where cov_sample use nobs in denominator
    # Bartlett 1954 has fewer terms

    S = np.asarray(cov) * (nobs - 1) / nobs
    S0 = np.asarray(cov_null)
    k = cov.shape[0]
    n = nobs

    fact = nobs - 1.
    fact *= 1 - (2 * k + 1 - 2 / (k + 1)) / (6 * (n - 1) - 1)
    fact2 = logdet(S0) - logdet(n / (n - 1) * S)
    fact2 += np.trace(n / (n - 1) * np.linalg.solve(S0, S)) - k
    statistic = fact * fact2
    df = k * (k + 1) / 2
    pvalue = stats.chi2.sf(statistic, df)
    return HolderTuple(statistic=statistic,
                       pvalue=pvalue,
                       df=df,
                       distr="chi2",
                       null="equal value",
                       cov_null=cov_null
                       )
Exemplo n.º 5
0
    def tost_prob_superior(self, low, upp):
        '''test of stochastic (non-)equivalence of p = P(x1 > x2)

        Null hypothesis:  p < low or p > upp
        Alternative hypothesis:  low < p < upp

        where p is the probability that a random draw from the population of
        the first sample has a larger value than a random draw from the
        population of the second sample, specifically

            p = P(x1 > x2) + 0.5 * P(x1 = x2)

        If the pvalue is smaller than a threshold, say 0.05, then we reject the
        hypothesis that the probability p that distribution 1 is stochastically
        superior to distribution 2 is outside of the interval given by
        thresholds low and upp.

        Parameters
        ----------
        low, upp : float
            equivalence interval low < mean < upp

        Returns
        -------
        res : HolderTuple
            HolderTuple instance with the following main attributes

            pvalue : float
                Pvalue of the equivalence test given by the larger pvalue of
                the two one-sided tests.
            statistic : float
                Test statistic of the one-sided test that has the larger
                pvalue.
            results_larger : HolderTuple
                Results instanc with test statistic, pvalue and degrees of
                freedom for lower threshold test.
            results_smaller : HolderTuple
                Results instanc with test statistic, pvalue and degrees of
                freedom for upper threshold test.

        '''

        t1 = self.test_prob_superior(low, alternative='larger')
        t2 = self.test_prob_superior(upp, alternative='smaller')

        # idx_max = 0 if t1.pvalue < t2.pvalue else 1
        idx_max = np.asarray(t1.pvalue < t2.pvalue, int)
        title = "Equivalence test for Prob(x1 > x2) + 0.5 Prob(x1 = x2) "
        res = HolderTuple(
            statistic=np.choose(idx_max, [t1.statistic, t2.statistic]),
            # pvalue=[t1.pvalue, t2.pvalue][idx_max], # python
            # use np.choose for vectorized selection
            pvalue=np.choose(idx_max, [t1.pvalue, t2.pvalue]),
            results_larger=t1,
            results_smaller=t2,
            title=title)
        return res
Exemplo n.º 6
0
def test_cov_spherical(cov, nobs):
    r"""One sample hypothesis test that covariance matrix is spherical

    The Null and alternative hypotheses are

    .. math::

       H0 &: \Sigma = \sigma I \\
       H1 &: \Sigma \neq \sigma I

    where :math:`\sigma_i` is the common variance with unspecified value.

    Parameters
    ----------
    cov : array_like
        Covariance matrix of the data, estimated with denominator ``(N - 1)``,
        i.e. `ddof=1`.
    nobs : int
        number of observations used in the estimation of the covariance

    Returns
    -------
    res : instance of HolderTuple
        results with ``statistic, pvalue`` and other attributes like ``df``

    References
    ----------
    Bartlett, M. S. 1954. “A Note on the Multiplying Factors for Various Χ2
    Approximations.” Journal of the Royal Statistical Society. Series B
    (Methodological) 16 (2): 296–98.

    Rencher, Alvin C., and William F. Christensen. 2012. Methods of
    Multivariate Analysis: Rencher/Methods. Wiley Series in Probability and
    Statistics. Hoboken, NJ, USA: John Wiley & Sons, Inc.
    https://doi.org/10.1002/9781118391686.

    StataCorp, L. P. Stata Multivariate Statistics: Reference Manual.
    Stata Press Publication.
    """

    # unchanged Stata formula, but denom is cov cancels, AFAICS
    # Bartlett 1954 correction factor in IIIc
    cov = np.asarray(cov)
    k = cov.shape[0]

    statistic = nobs - 1 - (2 * k**2 + k + 2) / (6 * k)
    statistic *= k * np.log(np.trace(cov)) - _logdet(cov) - k * np.log(k)
    df = k * (k + 1) / 2 - 1
    pvalue = stats.chi2.sf(statistic, df)
    return HolderTuple(statistic=statistic,
                       pvalue=pvalue,
                       df=df,
                       distr="chi2",
                       null="spherical"
                       )
Exemplo n.º 7
0
def test_cov_diagonal(cov, nobs):
    r"""One sample hypothesis test that covariance matrix is diagonal matrix.

    The Null and alternative hypotheses are

    .. math::

       H0 &: \Sigma = diag(\sigma_i) \\
       H1 &: \Sigma \neq diag(\sigma_i)

    where :math:`\sigma_i` are the variances with unspecified values.

    Parameters
    ----------
    cov : array_like
        Covariance matrix of the data, estimated with denominator ``(N - 1)``,
        i.e. `ddof=1`.
    nobs : int
        number of observations used in the estimation of the covariance

    Returns
    -------
    res : instance of HolderTuple
        results with ``statistic, pvalue`` and other attributes like ``df``

    References
    ----------
    Rencher, Alvin C., and William F. Christensen. 2012. Methods of
    Multivariate Analysis: Rencher/Methods. Wiley Series in Probability and
    Statistics. Hoboken, NJ, USA: John Wiley & Sons, Inc.
    https://doi.org/10.1002/9781118391686.

    StataCorp, L. P. Stata Multivariate Statistics: Reference Manual.
    Stata Press Publication.
    """
    cov = np.asarray(cov)
    k = cov.shape[0]
    R = cov2corr(cov)

    statistic = -(nobs - 1 - (2 * k + 5) / 6) * _logdet(R)
    df = k * (k - 1) / 2
    pvalue = stats.chi2.sf(statistic, df)
    return HolderTuple(statistic=statistic,
                       pvalue=pvalue,
                       df=df,
                       distr="chi2",
                       null="diagonal"
                       )
Exemplo n.º 8
0
def test_poisson_zeros(results):
    """Test for excess zeros in Poisson regression model.

    The test is implemented following Tang and Tang [1]_ equ. (12) which is
    based on the test derived in He et al 2019 [2]_.

    References
    ----------

    .. [1] Tang, Yi, and Wan Tang. 2018. “Testing Modified Zeros for Poisson
           Regression Models:” Statistical Methods in Medical Research,
           September. https://doi.org/10.1177/0962280218796253.

    .. [2] He, Hua, Hui Zhang, Peng Ye, and Wan Tang. 2019. “A Test of Inflated
           Zeros for Poisson Regression Models.” Statistical Methods in
           Medical Research 28 (4): 1157–69.
           https://doi.org/10.1177/0962280217749991.

    """
    x = results.model.exog
    mean = results.predict()
    prob0 = np.exp(-mean)
    counts = (results.model.endog == 0).astype(int)
    diff = counts.sum() - prob0.sum()
    var1 = prob0 @ (1 - prob0)
    pm = prob0 * mean
    c = np.linalg.inv(x.T * mean @ x)
    pmx = pm @ x
    var2 = pmx @ c @ pmx
    var = var1 - var2
    statistic = diff / np.sqrt(var)

    pvalue_two = 2 * stats.norm.sf(np.abs(statistic))
    pvalue_upp = stats.norm.sf(statistic)
    pvalue_low = stats.norm.cdf(statistic)

    res = HolderTuple(
        statistic=statistic,
        pvalue=pvalue_two,
        pvalue_smaller=pvalue_upp,
        pvalue_larger=pvalue_low,
        chi2=statistic**2,
        pvalue_chi2=stats.chi2.sf(statistic**2, 1),
        df_chi2=1,
        distribution="normal",
    )
    return res
Exemplo n.º 9
0
def test_poisson_zeroinflation_broek(results_poisson):
    """score test for zero modification in Poisson, special case

    This assumes that the Poisson model has a constant and that
    the zero modification probability is constant.

    This is a special case of test_poisson_zeroinflation derived by
    van den Broek 1995.

    The test reports two sided and one sided alternatives based on
    the normal distribution of the test statistic.

    References
    ----------
    .. [1] Broek, Jan van den. 1995. “A Score Test for Zero Inflation in a
           Poisson Distribution.” Biometrics 51 (2): 738–43.
           https://doi.org/10.2307/2532959.

    """

    mu = results_poisson.predict()
    prob_zero = np.exp(-mu)
    endog = results_poisson.model.endog
    # nobs = len(endog)
    # score =  ((endog == 0) / prob_zero).sum() - nobs
    # var_score = (1 / prob_zero).sum() - nobs - endog.sum()
    score = (((endog == 0) - prob_zero) / prob_zero).sum()
    var_score = ((1 - prob_zero) / prob_zero).sum() - endog.sum()
    statistic = score / np.sqrt(var_score)
    pvalue_two = 2 * stats.norm.sf(np.abs(statistic))
    pvalue_upp = stats.norm.sf(statistic)
    pvalue_low = stats.norm.cdf(statistic)

    res = HolderTuple(
        statistic=statistic,
        pvalue=pvalue_two,
        pvalue_smaller=pvalue_upp,
        pvalue_larger=pvalue_low,
        chi2=statistic**2,
        pvalue_chi2=stats.chi2.sf(statistic**2, 1),
        df_chi2=1,
        distribution="normal",
    )
    return res
Exemplo n.º 10
0
def test_mvmean_2indep(data1, data2):
    """Hotellings test for multivariate mean in two independent samples

    The null hypothesis is that both samples have the same mean.
    The alternative hypothesis is that means differ.

    Parameters
    ----------
    data1 : array_like
        first sample data with observations in rows and variables in columns
    data2 : array_like
        second sample data with observations in rows and variables in columns

    Returns
    -------
    results : instance of a results class with attributes
        statistic, pvalue, t2 and df
    """
    x1 = array_like(data1, "x1", ndim=2)
    x2 = array_like(data2, "x2", ndim=2)
    nobs1, k_vars = x1.shape
    nobs2, k_vars2 = x2.shape
    if k_vars2 != k_vars:
        msg = "both samples need to have the same number of columns"
        raise ValueError(msg)
    mean1 = x1.mean(0)
    mean2 = x2.mean(0)
    cov1 = np.cov(x1, rowvar=False, ddof=1)
    cov2 = np.cov(x2, rowvar=False, ddof=1)
    nobs_t = nobs1 + nobs2
    combined_cov = ((nobs1 - 1) * cov1 + (nobs2 - 1) * cov2) / (nobs_t - 2)
    diff = mean1 - mean2
    t2 = (nobs1 * nobs2) / nobs_t * diff @ np.linalg.solve(combined_cov, diff)
    factor = ((nobs_t - 2) * k_vars) / (nobs_t - k_vars - 1)
    statistic = t2 / factor
    df = (k_vars, nobs_t - 1 - k_vars)
    pvalue = stats.f.sf(statistic, df[0], df[1])
    return HolderTuple(statistic=statistic,
                       pvalue=pvalue,
                       df=df,
                       t2=t2,
                       distr="F")
Exemplo n.º 11
0
def test_mvmean(data, mean_null=0, return_results=True):
    """Hotellings test for multivariate mean in one sample

    Parameters
    ----------
    data : array_like
        data with observations in rows and variables in columns
    mean_null : array_like
        mean of the multivariate data under the null hypothesis
    return_results : bool
        If true, then a results instance is returned. If False, then only
        the test statistic and pvalue are returned.

    Returns
    -------
    results : instance of a results class with attributes
        statistic, pvalue, t2 and df
    (statistic, pvalue) : tuple
        If return_results is false, then only the test statistic and the
        pvalue are returned.

    """
    x = np.asarray(data)
    nobs, k_vars = x.shape
    mean = x.mean(0)
    cov = np.cov(x, rowvar=False, ddof=1)
    diff = mean - mean_null
    t2 = nobs * diff.dot(np.linalg.solve(cov, diff))
    factor = (nobs - 1) * k_vars / (nobs - k_vars)
    statistic = t2 / factor
    df = (k_vars, nobs - k_vars)
    pvalue = stats.f.sf(statistic, df[0], df[1])
    if return_results:
        res = HolderTuple(statistic=statistic,
                          pvalue=pvalue,
                          df=df,
                          t2=t2,
                          distr="F")
        return res
    else:
        return statistic, pvalue
def test_holdertuple2():
    ht = HolderTuple(tuple_=("statistic", "extra"), statistic=5, pvalue=0.1,
                     text="just something", extra=[1, 2, 4])
    assert_equal(len(ht), 2)
    assert_equal(ht[:], [5, [1, 2, 4]])
    p, v = ht
    assert_equal([p, v], [5, [1, 2, 4]])
    p, v = ht[0], ht[1]
    assert_equal([p, v], [5, [1, 2, 4]])
    assert_equal(list(ht), [5, [1, 2, 4]])

    x = np.asarray(ht, dtype=object)
    assert_equal(x, np.asarray([5, [1, 2, 4]], dtype=object))
    assert_equal(x.dtype, np.dtype('O'))
    # assert_equal(pd.Series(ht).values, [5, [1, 2, 4]])
    # assert_equal(pd.Series(ht).dtype, np.dtype('O'))

    assert_equal(ht.statistic, 5)
    assert_equal(ht.pvalue, 0.1)
    assert_equal(ht.extra, [1, 2, 4])
    assert_equal(ht.text, "just something")
def test_holdertuple():
    ht = HolderTuple(statistic=5, pvalue=0.1, text="just something",
                     extra=[1, 2, 4])
    assert_equal(len(ht), 2)
    assert_equal(ht[:], [5, 0.1])
    p, v = ht
    assert_equal([p, v], [5, 0.1])
    p, v = ht[0], ht[1]
    assert_equal([p, v], [5, 0.1])
    assert_equal(list(ht), [5, 0.1])
    assert_equal(np.asarray(ht), [5, 0.1])
    assert_equal(np.asarray(ht).dtype, np.float64)
    x = np.zeros((2, 2))
    x[0] = ht
    assert_equal(x, [[5, 0.1], [0, 0]])

    assert_equal(pd.Series(ht).values, [5, 0.1])
    assert_equal(pd.DataFrame([ht, ht]).values, [[5, 0.1], [5, 0.1]])

    assert_equal(ht.statistic, 5)
    assert_equal(ht.pvalue, 0.1)
    assert_equal(ht.extra, [1, 2, 4])
    assert_equal(ht.text, "just something")
Exemplo n.º 14
0
def test_mvmean_2indep(data1, data2):
    """Hotellings test for multivariate mean in two samples

    Parameters
    ----------
    data1 : array_like
        first sample data with observations in rows and variables in columns
    data2 : array_like
        second sample data with observations in rows and variables in columns

    Returns
    -------
    results : instance of a results class with attributes
        statistic, pvalue, t2 and df
    """
    x1 = array_like(data1, "x1", ndim=2)
    x2 = array_like(data2, "x2", ndim=2)
    nobs_x, k_vars = x1.shape
    nobs_y, k_vars = x2.shape
    mean_x = x1.mean(0)
    mean_y = x2.mean(0)
    cov_x = np.cov(x1, rowvar=False, ddof=1)
    cov_y = np.cov(x2, rowvar=False, ddof=1)
    nobs_t = nobs_x + nobs_y
    combined_cov = ((nobs_x - 1) * cov_x + (nobs_y - 1) * cov_y) / (nobs_t - 2)
    diff = mean_x - mean_y
    t2 = (nobs_x * nobs_y) / nobs_t * diff @ (np.linalg.solve(combined_cov, diff))
    factor = ((nobs_t - 2) * k_vars) / (nobs_t - k_vars - 1)
    statistic = t2 / factor
    df = (k_vars, nobs_t - 1 - k_vars)
    pvalue = stats.f.sf(statistic, df[0], df[1])
    return HolderTuple(statistic=statistic,
                      pvalue=pvalue,
                      df=df,
                      t2=t2,
                      distr="F")
Exemplo n.º 15
0
def equivalence_oneway_generic(f_stat, n_groups, nobs, equiv_margin, df,
                               alpha=0.05, margin_type="f2"):
    """Equivalence test for oneway anova (Wellek and extensions)

    Warning: eps is currently defined as in Wellek, but will change to
    signal to noise ration (Cohen's f family)

    The null hypothesis is that the means differ by more than `eps` in the
    anova distance measure.
    If the Null is rejected, then the data supports that means are equivalent,
    i.e. within a given distance.

    Parameters
    ----------
    f, n_groups, nobs, eps, df, alpha

    Returns
    -------
    results : instance of a Holder class



    Notes
    -----
    Equivalence in this function is defined in terms of a squared distance
    measure similar to Mahalanobis distance.
    Alternative definitions for the oneway case are based on maximum difference
    between pairs of means or similar pairwise distances.

    The equivalence margin is used for the noncentrality parameter in the
    noncentral F distribution for the test statistic. In samples with unequal
    variances estimated using Welch or Brown-Forsythe Anova, the f-statistic
    depends on the unequal variances and corrections to the test statistic.
    This means that the equivalence margins are not fully comparable across
    methods for treating unequal variances.

    References
    ----------
    Wellek book

    Cribbie, Robert A., Chantal A. Arpin-Cribbie, and Jamie A. Gruman. 2009.
    “Tests of Equivalence for One-Way Independent Groups Designs.” The Journal
    of Experimental Education 78 (1): 1–13.
    https://doi.org/10.1080/00220970903224552.

    Jan, Show-Li, and Gwowen Shieh. 2019. “On the Extended Welch Test for
    Assessing Equivalence of Standardized Means.” Statistics in
    Biopharmaceutical Research 0 (0): 1–8.
    https://doi.org/10.1080/19466315.2019.1654915.

    """
    nobs_t = nobs.sum()
    nobs_mean = nobs_t / n_groups

    if margin_type == "wellek":
        nc_null = nobs_mean * equiv_margin**2
        es = f_stat * (n_groups - 1) / nobs_mean
        type_effectsize = "Wellek's psi_squared"
    elif margin_type in ["f2", "fsqu", "fsquared"]:
        nc_null = nobs_t * equiv_margin
        es = f_stat / nobs_t
        type_effectsize = "Cohen's f_squared"
    else:
        raise ValueError('`margin_type` should be "f2" or "wellek"')
    crit_f = stats.ncf.ppf(alpha, df[0], df[1], nc_null)

    if margin_type == "wellek":
        # TODO: do we need a sqrt
        crit_es = crit_f * (n_groups - 1) / nobs_mean
    elif margin_type in ["f2", "fsqu", "fsquared"]:
        crit_es = crit_f / nobs_t

    reject = (es < crit_es)

    pv = stats.ncf.cdf(f_stat, df[0], df[1], nc_null)
    pwr = stats.ncf.cdf(crit_f, df[0], df[1], 1e-13)  # scipy, cannot be 0
    res = HolderTuple(statistic=f_stat,
                      pvalue=pv,
                      effectsize=es,  # match es type to margin_type
                      crit_f=crit_f,
                      crit_es=crit_es,
                      reject=reject,
                      power_zero=pwr,
                      df=df,
                      f_stat=f_stat,
                      type_effectsize=type_effectsize
                      )
    return res
Exemplo n.º 16
0
def tost_poisson_2indep(count1,
                        exposure1,
                        count2,
                        exposure2,
                        low,
                        upp,
                        method='score'):
    '''Equivalence test based on two one-sided `test_proportions_2indep`

    This assumes that we have two independent binomial samples.

    The Null and alternative hypothesis for equivalence testing are

    - H0: g1 / g2 <= low or upp <= g1 / g2
    - H1: low < g1 / g2 < upp

    where g1 and g2 are the Poisson rates.

    Parameters
    ----------
    count1 : int
        Number of events in first sample
    exposure1 : float
        Total exposure (time * subjects) in first sample
    count2 : int
        Number of events in second sample
    exposure2 : float
        Total exposure (time * subjects) in second sample
    low, upp :
        equivalence margin for the ratio of Poisson rates
    method: string
        Method for the test statistic and the p-value. Defaults to `'score'`.
        Current Methods are based on Gu et. al 2008
        Implemented are 'wald', 'score' and 'sqrt' based asymptotic normal
        distribution, and the exact conditional test 'exact-cond', and its
        mid-point version 'cond-midp', see Notes

    Returns
    -------
    results : instance of HolderTuple class
        The two main attributes are test statistic `statistic` and p-value
        `pvalue`.

    Notes
    -----
    - 'wald': method W1A, wald test, variance based on separate estimates
    - 'score': method W2A, score test, variance based on estimate under Null
    - 'wald-log': W3A  not implemented
    - 'score-log' W4A  not implemented
    - 'sqrt': W5A, based on variance stabilizing square root transformation
    - 'exact-cond': exact conditional test based on binomial distribution
    - 'cond-midp': midpoint-pvalue of exact conditional test

    The latter two are only verified for one-sided example.

    References
    ----------
    Gu, Ng, Tang, Schucany 2008: Testing the Ratio of Two Poisson Rates,
    Biometrical Journal 50 (2008) 2, 2008

    See Also
    --------
    test_poisson_2indep
    '''

    tt1 = test_poisson_2indep(count1,
                              exposure1,
                              count2,
                              exposure2,
                              ratio_null=low,
                              method=method,
                              alternative='larger')
    tt2 = test_poisson_2indep(count1,
                              exposure1,
                              count2,
                              exposure2,
                              ratio_null=upp,
                              method=method,
                              alternative='smaller')

    idx_max = 0 if tt1.pvalue < tt2.pvalue else 1
    res = HolderTuple(statistic=[tt1.statistic, tt2.statistic][idx_max],
                      pvalue=[tt1.pvalue, tt2.pvalue][idx_max],
                      method=method,
                      results_larger=tt1,
                      results_smaller=tt2,
                      title="Equivalence test for 2 independent Poisson rates")

    return res
Exemplo n.º 17
0
def anova_generic(means, vars_, nobs, use_var="unequal",
                  welch_correction=True, info=None):
    """oneway anova based on summary statistics

    incompletely verified

    """
    options = {"use_var": use_var,
               "welch_correction": welch_correction
               }
    if means.ndim != 1:
        raise ValueError('data (means, ...) has to be one-dimensional')
    nobs_t = nobs.sum()
    n_groups = len(means)
    # mean_t = (nobs * means).sum() / nobs_t
    if use_var == "unequal":
        weights = nobs / vars_
    else:
        weights = nobs

    w_total = weights.sum()
    w_rel = weights / w_total
    # meanw_t = (weights * means).sum() / w_total
    meanw_t = w_rel @ means

    statistic = np.dot(weights, (means - meanw_t)**2) / (n_groups - 1.)
    df_num = n_groups - 1.

    if use_var == "unequal":
        tmp = ((1 - w_rel)**2 / (nobs - 1)).sum() / (n_groups**2 - 1)
        if welch_correction:
            statistic /= 1 + 2 * (n_groups - 2) * tmp
        df_denom = 1. / (3. * tmp)

    elif use_var == "equal":
        # variance of group demeaned total sample, pooled var_resid
        tmp = ((nobs - 1) * vars_).sum() / (nobs_t - n_groups)
        statistic /= tmp
        df_denom = nobs_t - n_groups
    elif use_var == "bf":
        tmp = ((1. - nobs / nobs_t) * vars_).sum()
        statistic = 1. * (nobs * (means - meanw_t)**2).sum()
        statistic /= tmp

        df_num2 = n_groups - 1
        df_denom = tmp**2 / ((1. - nobs / nobs_t)**2 *
                             vars_**2 / (nobs - 1)).sum()
        df_num = tmp**2 / ((vars_**2).sum() +
                           (nobs / nobs_t * vars_).sum()**2 -
                           2 * (nobs / nobs_t * vars_**2).sum())
        pval2 = stats.f.sf(statistic, df_num2, df_denom)
        options["df2"] = (df_num2, df_denom)
        options["df_num2"] = df_num2
        options["pvalue2"] = pval2
    else:
        raise ValueError('use_var is to be one of "unequal", "equal" or "bf"')

    pval = stats.f.sf(statistic, df_num, df_denom)
    res = HolderTuple(statistic=statistic,
                      pvalue=pval,
                      df=(df_num, df_denom),
                      df_num=df_num,
                      df_denom=df_denom,
                      nobs_t=nobs_t,
                      n_groups=n_groups,
                      means=means,
                      nobs=nobs,
                      vars_=vars_,
                      **options
                      )
    return res
Exemplo n.º 18
0
def test_poisson_2indep(count1,
                        exposure1,
                        count2,
                        exposure2,
                        ratio_null=1,
                        method='score',
                        alternative='two-sided',
                        etest_kwds=None):
    '''test for ratio of two sample Poisson intensities

    If the two Poisson rates are g1 and g2, then the Null hypothesis is

    - H0: g1 / g2 = ratio_null

    against one of the following alternatives

    - H1_2-sided: g1 / g2 != ratio_null
    - H1_larger: g1 / g2 > ratio_null
    - H1_smaller: g1 / g2 < ratio_null

    Parameters
    ----------
    count1 : int
        Number of events in first sample.
    exposure1 : float
        Total exposure (time * subjects) in first sample.
    count2 : int
        Number of events in second sample.
    exposure2 : float
        Total exposure (time * subjects) in second sample.
    ratio: float
        ratio of the two Poisson rates under the Null hypothesis. Default is 1.
    method : string
        Method for the test statistic and the p-value. Defaults to `'score'`.
        Current Methods are based on Gu et. al 2008.
        Implemented are 'wald', 'score' and 'sqrt' based asymptotic normal
        distribution, and the exact conditional test 'exact-cond', and its
        mid-point version 'cond-midp'. method='etest' and method='etest-wald'
        provide pvalues from `etest_poisson_2indep` using score or wald
        statistic respectively.
        see Notes.
    alternative : string
        The alternative hypothesis, H1, has to be one of the following

        - 'two-sided': H1: ratio of rates is not equal to ratio_null (default)
        - 'larger' :   H1: ratio of rates is larger than ratio_null
        - 'smaller' :  H1: ratio of rates is smaller than ratio_null
    etest_kwds: dictionary
        Additional parameters to be passed to the etest_poisson_2indep
        function, namely ygrid.

    Returns
    -------
    results : instance of HolderTuple class
        The two main attributes are test statistic `statistic` and p-value
        `pvalue`.

    Notes
    -----
    - 'wald': method W1A, wald test, variance based on separate estimates
    - 'score': method W2A, score test, variance based on estimate under Null
    - 'wald-log': W3A
    - 'score-log' W4A
    - 'sqrt': W5A, based on variance stabilizing square root transformation
    - 'exact-cond': exact conditional test based on binomial distribution
    - 'cond-midp': midpoint-pvalue of exact conditional test
    - 'etest': etest with score test statistic
    - 'etest-wald': etest with wald test statistic

    References
    ----------
    Gu, Ng, Tang, Schucany 2008: Testing the Ratio of Two Poisson Rates,
    Biometrical Journal 50 (2008) 2, 2008

    See Also
    --------
    tost_poisson_2indep
    etest_poisson_2indep
    '''

    # shortcut names
    y1, n1, y2, n2 = count1, exposure1, count2, exposure2
    d = n2 / n1
    r = ratio_null
    r_d = r / d

    if method in ['score']:
        stat = (y1 - y2 * r_d) / np.sqrt((y1 + y2) * r_d)
        dist = 'normal'
    elif method in ['wald']:
        stat = (y1 - y2 * r_d) / np.sqrt(y1 + y2 * r_d**2)
        dist = 'normal'
    elif method in ['sqrt']:
        stat = 2 * (np.sqrt(y1 + 3 / 8.) - np.sqrt((y2 + 3 / 8.) * r_d))
        stat /= np.sqrt(1 + r_d)
        dist = 'normal'
    elif method in ['exact-cond', 'cond-midp']:
        from statsmodels.stats import proportion
        bp = r_d / (1 + r_d)
        y_total = y1 + y2
        stat = None
        # TODO: why y2 in here and not y1, check definition of H1 "larger"
        pvalue = proportion.binom_test(y1,
                                       y_total,
                                       prop=bp,
                                       alternative=alternative)
        if method in ['cond-midp']:
            # not inplace in case we still want binom pvalue
            pvalue = pvalue - 0.5 * stats.binom.pmf(y1, y_total, bp)

        dist = 'binomial'
    elif method.startswith('etest'):
        if method.endswith('wald'):
            method_etest = 'wald'
        else:
            method_etest = 'score'
        if etest_kwds is None:
            etest_kwds = {}

        stat, pvalue = etest_poisson_2indep(count1,
                                            exposure1,
                                            count2,
                                            exposure2,
                                            ratio_null=ratio_null,
                                            method=method_etest,
                                            alternative=alternative,
                                            **etest_kwds)

        dist = 'poisson'
    else:
        raise ValueError('method not recognized')

    if dist == 'normal':
        stat, pvalue = _zstat_generic2(stat, 1, alternative)

    rates = (y1 / n1, y2 / n2)
    ratio = rates[0] / rates[1]
    res = HolderTuple(statistic=stat,
                      pvalue=pvalue,
                      distribution=dist,
                      method=method,
                      alternative=alternative,
                      rates=rates,
                      ratio=ratio,
                      ratio_null=ratio_null)
    return res
Exemplo n.º 19
0
def test_chisquare_binning(counts,
                           expected,
                           sort_var=None,
                           bins=10,
                           df=None,
                           ordered=False,
                           sort_method="quicksort",
                           alpha_nc=0.05):
    """chisquare gof test with binning of data, Hosmer-Lemeshow type

    ``observed`` and ``expected`` are observation specific and should have
    observations in rows and choices in columns

    Parameters
    ----------
    counts : array_like
        Observed frequency, i.e. counts for all choices
    expected : array_like
        Expected counts or probability. If expected are counts, then they
        need to sum to the same total count as the sum of oberserved.
        If those sums are unequal and all expected values are smaller or equal
        to 1, then they are interpreted as probabilites and will be rescaled
        to match counts.
    sort_var : array_like
        1-dimensional array for binning. Groups will be formed according to
        quantiles of the sorted array ``sort_var``, so that group sizes have
        equal or approximately equal sizes.

    Returns
    -------
    Holdertuple instance
        This instance contains the results of the chisquare test and some
        information about the data

        - statistic : chisquare statistic of the goodness-of-fit test
        - pvalue : pvalue of the chisquare test
        = df : degrees of freedom of the test

    Notes
    -----
    Degrees of freedom for Hosmer-Lemeshow tests are given by

    g groups, c choices

    - binary: `df = (g - 2)` for insample,
         Stata uses `df = g` for outsample
    - multinomial: `df = (g−2) *(c−1)`, reduces to (g-2) for binary c=2,
         (Fagerland, Hosmer, Bofin SIM 2008)
    - ordinal: `df = (g - 2) * (c - 1) + (c - 2)`, reduces to (g-2) for c=2,
         (Hosmer, ... ?)

    Note: If there are ties in the ``sort_var`` array, then the split of
    observations into groups will depend on the sort algorithm.
    """

    observed = np.asarray(counts)
    expected = np.asarray(expected)
    n_observed = counts.sum()
    n_expected = expected.sum()
    if not np.allclose(n_observed, n_expected, atol=1e-13):
        if np.max(expected) < 1 + 1e-13:
            # expected seems to be probability, warn and rescale
            import warnings
            warnings.warn("sum of expected and of observed differ, "
                          "rescaling ``expected``")
            expected = expected / n_expected * n_observed
        else:
            # expected doesn't look like fractions or probabilities
            raise ValueError("total counts of expected and observed differ")

    # k = 1 if observed.ndim == 1 else observed.shape[1]
    if sort_var is not None:
        argsort = np.argsort(sort_var, kind=sort_method)
    else:
        argsort = np.arange(observed.shape[0])
    # indices = [arr for arr in np.array_split(argsort, bins, axis=0)]
    indices = np.array_split(argsort, bins, axis=0)
    # in one loop, observed expected in last dimension, too messy,
    # freqs_probs = np.array([np.vstack([observed[idx].mean(0),
    #                                    expected[idx].mean(0)]).T
    #                         for idx in indices])
    freqs = np.array([observed[idx].sum(0) for idx in indices])
    probs = np.array([expected[idx].sum(0) for idx in indices])

    # chisquare test
    resid_pearson = (freqs - probs) / np.sqrt(probs)
    chi2_stat_groups = ((freqs - probs)**2 / probs).sum(1)
    chi2_stat = chi2_stat_groups.sum()
    if df is None:
        g, c = freqs.shape
        if ordered is True:
            df = (g - 2) * (c - 1) + (c - 2)
        else:
            df = (g - 2) * (c - 1)
    pvalue = stats.chi2.sf(chi2_stat, df)
    noncentrality = _noncentrality_chisquare(chi2_stat, df, alpha=alpha_nc)

    res = HolderTuple(statistic=chi2_stat,
                      pvalue=pvalue,
                      df=df,
                      freqs=freqs,
                      probs=probs,
                      noncentrality=noncentrality,
                      resid_pearson=resid_pearson,
                      chi2_stat_groups=chi2_stat_groups,
                      indices=indices)
    return res
Exemplo n.º 20
0
def test_poisson_zeroinflation_jh(results_poisson, exog_infl=None):
    """score test for zero inflation or deflation in Poisson

    This implements Jansakul and Hinde 2009 score test
    for excess zeros against a zero modified Poisson
    alternative. They use a linear link function for the
    inflation model to allow for zero deflation.

    Parameters
    ----------
    results_poisson: results instance
        The test is only valid if the results instance is a Poisson
        model.
    exog_infl : ndarray
        Explanatory variables for the zero inflated or zero modified
        alternative. I exog_infl is None, then the inflation
        probability is assumed to be constant.

    Returns
    -------
    score test results based on chisquare distribution

    Notes
    -----
    This is a score test based on the null hypothesis that
    the true model is Poisson. It will also reject for
    other deviations from a Poisson model if those affect
    the zero probabilities, e.g. in the direction of
    excess dispersion as in the Negative Binomial
    or Generalized Poisson model.
    Therefore, rejection in this test does not imply that
    zero-inflated Poisson is the appropriate model.

    Status: experimental, no verified unit tests,

    TODO: If the zero modification probability is assumed
    to be constant under the alternative, then we only have
    a scalar test score and we can use one-sided tests to
    distinguish zero inflation and deflation from the
    two-sided deviations. (The general one-sided case is
    difficult.)
    In this case the test specializes to the test by Broek

    References
    ----------
    .. [1] Jansakul, N., and J. P. Hinde. 2002. “Score Tests for Zero-Inflated
           Poisson Models.” Computational Statistics & Data Analysis 40 (1):
           75–96. https://doi.org/10.1016/S0167-9473(01)00104-9.
    """
    if not isinstance(results_poisson.model, Poisson):
        # GLM Poisson would be also valid, not tried
        import warnings
        warnings.warn('Test is only valid if model is Poisson')

    nobs = results_poisson.model.endog.shape[0]

    if exog_infl is None:
        exog_infl = np.ones((nobs, 1))

    endog = results_poisson.model.endog
    exog = results_poisson.model.exog

    mu = results_poisson.predict()
    prob_zero = np.exp(-mu)

    cov_poi = results_poisson.cov_params()
    cross_derivative = (exog_infl.T * (-mu)).dot(exog).T
    cov_infl = (exog_infl.T * ((1 - prob_zero) / prob_zero)).dot(exog_infl)
    score_obs_infl = exog_infl * ((
        (endog == 0) - prob_zero) / prob_zero)[:, None]
    #score_obs_infl = exog_infl * ((endog == 0) * (1 - prob_zero) / prob_zero - (endog>0))[:,None] #same
    score_infl = score_obs_infl.sum(0)
    cov_score_infl = cov_infl - cross_derivative.T.dot(cov_poi).dot(
        cross_derivative)
    cov_score_infl_inv = np.linalg.pinv(cov_score_infl)

    statistic = score_infl.dot(cov_score_infl_inv).dot(score_infl)
    df2 = np.linalg.matrix_rank(
        cov_score_infl)  # more general, maybe not needed
    df = exog_infl.shape[1]
    pvalue = stats.chi2.sf(statistic, df)

    res = HolderTuple(
        statistic=statistic,
        pvalue=pvalue,
        df=df,
        rank_score=df2,
        distribution="chi2",
    )
    return res
Exemplo n.º 21
0
def test_chisquare_prob(results, probs, bin_edges=None, method=None):
    """
    chisquare test for predicted probabilities using cmt-opg

    Parameters
    ----------
    results : results instance
        Instance of a count regression results
    probs : ndarray
        Array of predicted probabilities with observations
        in rows and event counts in columns
    bin_edges : None or array
        intervals to combine several counts into cells
        see combine_bins

    Returns
    -------
    (api not stable, replace by test-results class)
    statistic : float
        chisquare statistic for tes
    p-value : float
        p-value of test
    df : int
        degrees of freedom for chisquare distribution
    extras : ???
        currently returns a tuple with some intermediate results
        (diff, res_aux)

    Notes
    -----

    Status : experimental, no verified unit tests, needs to be generalized
    currently only OPG version with auxiliary regression is implemented

    Assumes counts are np.arange(probs.shape[1]), i.e. consecutive
    integers starting at zero.

    Auxiliary regression drops the last column of binned probs to avoid
    that probabilities sum to 1.

    References
    ----------
    .. [1] Andrews, Donald W. K. 1988a. “Chi-Square Diagnostic Tests for
           Econometric Models: Theory.” Econometrica 56 (6): 1419–53.
           https://doi.org/10.2307/1913105.

    .. [2] Andrews, Donald W. K. 1988b. “Chi-Square Diagnostic Tests for
           Econometric Models.” Journal of Econometrics 37 (1): 135–56.
           https://doi.org/10.1016/0304-4076(88)90079-6.

    .. [3] Manjón, M., and O. Martínez. 2014. “The Chi-Squared Goodness-of-Fit
           Test for Count-Data Models.” Stata Journal 14 (4): 798–816.
    """
    res = results
    score_obs = results.model.score_obs(results.params)
    d_ind = (res.model.endog[:, None] == np.arange(probs.shape[1])).astype(int)
    if bin_edges is not None:
        d_ind_bins, k_bins = _combine_bins(bin_edges, d_ind)
        probs_bins, k_bins = _combine_bins(bin_edges, probs)
        k_bins = probs_bins.shape[-1]
    else:
        d_ind_bins, k_bins = d_ind, d_ind.shape[1]
        probs_bins = probs
    diff1 = d_ind_bins - probs_bins
    # diff2 = (1 - d_ind.sum(1)) - (1 - probs_bins.sum(1))
    x_aux = np.column_stack((score_obs, diff1[:, :-1]))  # diff2))
    nobs = x_aux.shape[0]
    res_aux = OLS(np.ones(nobs), x_aux).fit()

    chi2_stat = nobs * (1 - res_aux.ssr / res_aux.uncentered_tss)
    df = res_aux.model.rank - score_obs.shape[1]
    if df < k_bins - 1:
        # not a problem in general, but it can be for OPG version
        import warnings
        # TODO: Warning shows up in Monte Carlo loop, skip for now
        warnings.warn('auxiliary model is rank deficient')

    statistic = chi2_stat
    pvalue = stats.chi2.sf(chi2_stat, df)

    res = HolderTuple(
        statistic=statistic,
        pvalue=pvalue,
        df=df,
        diff1=diff1,
        res_aux=res_aux,
        distribution="chi2",
    )
    return res
Exemplo n.º 22
0
def anova_generic(means, vars_, nobs, use_var="unequal",
                  welch_correction=True, info=None):
    """Oneway anova based on summary statistics

    Parameters
    ----------
    means: array_like
        Mean of samples to be compared
    vars_ : float or array_like
        Residual (within) variance of each sample or pooled
        If var_ is scalar, then it is interpreted as pooled variance that is
        the same for all samples, ``use_var`` will be ignored.
        Otherwise, the variances are used depending on the ``use_var`` keyword.
    nobs : int or array_like
        Number of observations for the samples.
        If nobs is scalar, then it is assumed that all samples have the same
        number ``nobs`` of observation, i.e. a balanced sample case.
        Otherwise, statistics will be weighted corresponding to nobs.
        Only relative sizes are relevant, any proportional change to nobs does
        not change the effect size.
    use_var : {"unequal", "equal", "bf"}
        If ``use_var`` is "unequal", then the variances can differe across
        samples and the effect size for Welch anova will be computed.
    welch

    Returns
    -------
    f2 : float
        Effect size corresponding to squared Cohen's f, which is also equal
        to the noncentrality divided by total number of observations.
        In contrast to other functions, this value is not squared.

    """
    options = {"use_var": use_var,
               "welch_correction": welch_correction
               }
    if means.ndim != 1:
        raise ValueError('data (means, ...) has to be one-dimensional')
    nobs_t = nobs.sum()
    n_groups = len(means)
    # mean_t = (nobs * means).sum() / nobs_t
    if use_var == "unequal":
        weights = nobs / vars_
    else:
        weights = nobs

    w_total = weights.sum()
    w_rel = weights / w_total
    # meanw_t = (weights * means).sum() / w_total
    meanw_t = w_rel @ means

    statistic = np.dot(weights, (means - meanw_t)**2) / (n_groups - 1.)
    df_num = n_groups - 1.

    if use_var == "unequal":
        tmp = ((1 - w_rel)**2 / (nobs - 1)).sum() / (n_groups**2 - 1)
        if welch_correction:
            statistic /= 1 + 2 * (n_groups - 2) * tmp
        df_denom = 1. / (3. * tmp)

    elif use_var == "equal":
        # variance of group demeaned total sample, pooled var_resid
        tmp = ((nobs - 1) * vars_).sum() / (nobs_t - n_groups)
        statistic /= tmp
        df_denom = nobs_t - n_groups
    elif use_var == "bf":
        tmp = ((1. - nobs / nobs_t) * vars_).sum()
        statistic = 1. * (nobs * (means - meanw_t)**2).sum()
        statistic /= tmp

        df_num2 = n_groups - 1
        df_denom = tmp**2 / ((1. - nobs / nobs_t)**2 *
                             vars_**2 / (nobs - 1)).sum()
        df_num = tmp**2 / ((vars_**2).sum() +
                           (nobs / nobs_t * vars_).sum()**2 -
                           2 * (nobs / nobs_t * vars_**2).sum())
        pval2 = stats.f.sf(statistic, df_num2, df_denom)
        options["df2"] = (df_num2, df_denom)
        options["df_num2"] = df_num2
        options["pvalue2"] = pval2
    else:
        raise ValueError('use_var is to be one of "unequal", "equal" or "bf"')

    pval = stats.f.sf(statistic, df_num, df_denom)
    res = HolderTuple(statistic=statistic,
                      pvalue=pval,
                      df=(df_num, df_denom),
                      df_num=df_num,
                      df_denom=df_denom,
                      nobs_t=nobs_t,
                      n_groups=n_groups,
                      means=means,
                      nobs=nobs,
                      vars_=vars_,
                      **options
                      )
    return res
Exemplo n.º 23
0
def cov_test_oneway(cov_list, nobs_list):
    r"""Multiple sample hypothesis test that covariance matrices are equal.

    This is commonly known as Box-M test

    The Null and alternative hypotheses are

      $H0 : \Sigma_i = \Sigma_j  for all i and j \\
      H1 : \Sigma_i \neq diag(\Sigma_j) for at least one i and j$

    where $\Sigma_i$ is the covariance of sample $i$.

    Parameters
    ----------
    cov_list : list of array_like
        Covariance matrices of the sample, estimated with denominator
        ``(N - 1)``, i.e. `ddof=1`.
    nobs_list : list
        List of the number of observations used in the estimation of the
        covariance for each sample.

    Returns
    -------
    res : instance of HolderTuple
        Results contains test statistic and pvalues for both chisquare and F
        distribution based tests, identified by the name ending "_chi2" and
        "_f".
        Attributes ``statistic, pvalue`` refer to the F-test version.

    Notes
    -----
    approximations to distribution of test statistic is by Box

    References
    ----------
    Rencher, Alvin C., and William F. Christensen. 2012. Methods of
    Multivariate Analysis: Rencher/Methods. Wiley Series in Probability and
    Statistics. Hoboken, NJ, USA: John Wiley & Sons, Inc.
    https://doi.org/10.1002/9781118391686.

    StataCorp, L. P. Stata Multivariate Statistics: Reference Manual.
    Stata Press Publication.
    """
    # Note stata uses nobs in cov, this uses nobs - 1
    cov_list = list(map(np.asarray, cov_list))
    m = len(cov_list)
    nobs = sum(nobs_list)  # total number of observations
    k = cov_list[0].shape[0]

    cov_pooled = sum((n - 1) * c for (n, c) in zip(nobs_list, cov_list))
    cov_pooled /= (nobs - m)
    stat0 = (nobs - m) * logdet(cov_pooled)
    stat0 -= sum((n - 1) * logdet(c) for (n, c) in zip(nobs_list, cov_list))

    # Box's chi2
    c1 = sum(1 / (n - 1) for n in nobs_list) - 1 / (nobs - m)
    c1 *= (2 * k*k + 3 * k - 1) / (6 * (k + 1) * (m - 1))
    df_chi2 = (m - 1) * k * (k + 1) / 2
    statistic_chi2 = (1 - c1) * stat0
    pvalue_chi2 = stats.chi2.sf(statistic_chi2, df_chi2)

    c2 = sum(1 / (n - 1)**2 for n in nobs_list) - 1 / (nobs - m)**2
    c2 *= (k - 1) * (k + 2) / (6 * (m - 1))
    a1 = df_chi2
    a2 = (a1 + 2) / abs(c2 - c1**2)
    b1 = (1 - c1 - a1 / a2) / a1
    b2 = (1 - c1 + 2 / a2) / a2
    if c2 > c1**2:
        statistic_f = b1 * stat0
    else:
        tmp = b2 * stat0
        statistic_f = a2 / a1 * tmp / (1 + tmp)
        print("in branch 2")
    df_f = (a1, a2)
    pvalue_f = stats.f.sf(statistic_f, *df_f)
    return HolderTuple(statistic=statistic_f,  # name convention, using F here
                       pvalue=pvalue_f,   # name convention, using F here
                       statistic_base=stat0,
                       statistic_chi2=statistic_chi2,
                       pvalue_chi2=pvalue_chi2,
                       df_chi2=df_chi2,
                       distr_chi2='chi2',
                       statistic_f=statistic_f,
                       pvalue_f=pvalue_f,
                       df_f=df_f,
                       distr_f='F')
Exemplo n.º 24
0
def equivalence_oneway_generic(f_stat, n_groups, nobs, equiv_margin, df,
                               alpha=0.05, margin_type="f2"):
    """Equivalence test for oneway anova (Wellek and extensions)

    This is an helper function when summary statistics are available.
    Use `equivalence_oneway` instead.

    The null hypothesis is that the means differ by more than `equiv_margin`
    in the anova distance measure.
    If the Null is rejected, then the data supports that means are equivalent,
    i.e. within a given distance.

    Parameters
    ----------
    f_stat : float
        F-statistic
    n_groups : int
        Number of groups in oneway comparison.
    nobs : ndarray
        Array of number of observations in groups.
    equiv_margin : float
        Equivalence margin in terms of effect size. Effect size can be chosen
        with `margin_type`. default is squared Cohen's f.
    df : tuple
        degrees of freedom ``df = (df1, df2)`` where

        - df1 : numerator degrees of freedom, number of constraints
        - df2 : denominator degrees of freedom, df_resid

    alpha : float in (0, 1)
        Significance level for the hypothesis test.
    margin_type : "f2" or "wellek"
        Type of effect size used for equivalence margin.

    Returns
    -------
    results : instance of HolderTuple class
        The two main attributes are test statistic `statistic` and p-value
        `pvalue`.

    Notes
    -----
    Equivalence in this function is defined in terms of a squared distance
    measure similar to Mahalanobis distance.
    Alternative definitions for the oneway case are based on maximum difference
    between pairs of means or similar pairwise distances.

    The equivalence margin is used for the noncentrality parameter in the
    noncentral F distribution for the test statistic. In samples with unequal
    variances estimated using Welch or Brown-Forsythe Anova, the f-statistic
    depends on the unequal variances and corrections to the test statistic.
    This means that the equivalence margins are not fully comparable across
    methods for treating unequal variances.

    References
    ----------
    Wellek, Stefan. 2010. Testing Statistical Hypotheses of Equivalence and
    Noninferiority. 2nd ed. Boca Raton: CRC Press.

    Cribbie, Robert A., Chantal A. Arpin-Cribbie, and Jamie A. Gruman. 2009.
    “Tests of Equivalence for One-Way Independent Groups Designs.” The Journal
    of Experimental Education 78 (1): 1–13.
    https://doi.org/10.1080/00220970903224552.

    Jan, Show-Li, and Gwowen Shieh. 2019. “On the Extended Welch Test for
    Assessing Equivalence of Standardized Means.” Statistics in
    Biopharmaceutical Research 0 (0): 1–8.
    https://doi.org/10.1080/19466315.2019.1654915.

    """
    nobs_t = nobs.sum()
    nobs_mean = nobs_t / n_groups

    if margin_type == "wellek":
        nc_null = nobs_mean * equiv_margin**2
        es = f_stat * (n_groups - 1) / nobs_mean
        type_effectsize = "Wellek's psi_squared"
    elif margin_type in ["f2", "fsqu", "fsquared"]:
        nc_null = nobs_t * equiv_margin
        es = f_stat / nobs_t
        type_effectsize = "Cohen's f_squared"
    else:
        raise ValueError('`margin_type` should be "f2" or "wellek"')
    crit_f = stats.ncf.ppf(alpha, df[0], df[1], nc_null)

    if margin_type == "wellek":
        # TODO: do we need a sqrt
        crit_es = crit_f * (n_groups - 1) / nobs_mean
    elif margin_type in ["f2", "fsqu", "fsquared"]:
        crit_es = crit_f / nobs_t

    reject = (es < crit_es)

    pv = stats.ncf.cdf(f_stat, df[0], df[1], nc_null)
    pwr = stats.ncf.cdf(crit_f, df[0], df[1], 1e-13)  # scipy, cannot be 0
    res = HolderTuple(statistic=f_stat,
                      pvalue=pv,
                      effectsize=es,  # match es type to margin_type
                      crit_f=crit_f,
                      crit_es=crit_es,
                      reject=reject,
                      power_zero=pwr,
                      df=df,
                      f_stat=f_stat,
                      type_effectsize=type_effectsize
                      )
    return res