Exemplo n.º 1
0
def mwu(x, y, tail='two-sided'):
    """Mann-Whitney U Test (= Wilcoxon rank-sum test). It is the non-parametric
    version of the independent T-test.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations. ``x`` and ``y`` must be
        independent.
    tail : string
        Specify whether to return `'one-sided'` or `'two-sided'` p-value.
        Can also be `'greater'` or `'less'` to specify the direction of the
        test. If ``tail='one-sided'``, the alternative of the test will be
        automatically detected by comparing the medians of ``x`` and ``y``.
        For instance, if median(``x``) < median(``y``) and
        ``tail='one-sided'``, Pingouin will automatically set ``tail='less'``,
        and vice versa.

    Returns
    -------
    stats : pandas DataFrame
        Test summary ::

        'U-val' : U-value
        'p-val' : p-value
        'RBC'   : rank-biserial correlation (effect size)
        'CLES'  : common language effect size

    See also
    --------
    scipy.stats.mannwhitneyu, wilcoxon, ttest

    Notes
    -----
    The Mann–Whitney U test (also called Wilcoxon rank-sum test) is a
    non-parametric test of the null hypothesis that it is equally likely that
    a randomly selected value from one sample will be less than or greater
    than a randomly selected value from a second sample. The test assumes
    that the two samples are independent. This test corrects for ties and by
    default uses a continuity correction
    (see :py:func:`scipy.stats.mannwhitneyu` for details).

    The rank biserial correlation effect size is the difference between the
    proportion of favorable evidence minus the proportion of unfavorable
    evidence (see Kerby 2014).

    The common language effect size is the probability (from 0 to 1) that a
    randomly selected observation from the first sample will be greater than a
    randomly selected observation from the second sample.

    References
    ----------
    .. [1] Mann, H. B., & Whitney, D. R. (1947). On a test of whether one of
           two random variables is stochastically larger than the other.
           The annals of mathematical statistics, 50-60.

    .. [2] Kerby, D. S. (2014). The simple difference formula: An approach to
           teaching nonparametric correlation. Comprehensive Psychology,
           3, 11-IT.

    .. [3] McGraw, K. O., & Wong, S. P. (1992). A common language effect size
           statistic. Psychological bulletin, 111(2), 361.

    Examples
    --------
    >>> import numpy as np
    >>> import pingouin as pg
    >>> np.random.seed(123)
    >>> x = np.random.uniform(low=0, high=1, size=20)
    >>> y = np.random.uniform(low=0.2, high=1.2, size=20)
    >>> pg.mwu(x, y, tail='two-sided')
         U-val       tail    p-val    RBC   CLES
    MWU   97.0  two-sided  0.00556  0.515  0.758

    Compare with SciPy

    >>> import scipy
    >>> scipy.stats.mannwhitneyu(x, y, use_continuity=True,
    ...                          alternative='two-sided')
    MannwhitneyuResult(statistic=97.0, pvalue=0.0055604599321374135)

    One-sided tail: one can either manually specify the alternative hypothesis

    >>> pg.mwu(x, y, tail='greater')
         U-val     tail     p-val    RBC   CLES
    MWU   97.0  greater  0.997442  0.515  0.758

    >>> pg.mwu(x, y, tail='less')
         U-val  tail    p-val    RBC   CLES
    MWU   97.0  less  0.00278  0.515  0.758

    Or simply leave it to Pingouin, using the `'one-sided'` argument, in which
    case Pingouin will compare the medians of ``x`` and ``y`` and select the
    most appropriate tail based on that:

    >>> # Since np.median(x) < np.median(y), this is equivalent to tail='less'
    >>> pg.mwu(x, y, tail='one-sided')
         U-val  tail    p-val    RBC   CLES
    MWU   97.0  less  0.00278  0.515  0.758
    """
    x = np.asarray(x)
    y = np.asarray(y)

    # Remove NA
    x, y = remove_na(x, y, paired=False)

    # Check tails
    possible_tails = ['two-sided', 'one-sided', 'greater', 'less']
    assert tail in possible_tails, 'Invalid tail argument.'
    if tail == 'one-sided':
        # Detect the direction of the test based on the median
        tail = 'less' if np.median(x) < np.median(y) else 'greater'

    uval, pval = scipy.stats.mannwhitneyu(x,
                                          y,
                                          use_continuity=True,
                                          alternative=tail)

    # Effect size 1: common language effect size (McGraw and Wong 1992)
    diff = x[:, None] - y
    cles = max((diff < 0).sum(), (diff > 0).sum()) / diff.size

    # Effect size 2: rank biserial correlation (Wendt 1972)
    rbc = 1 - (2 * uval) / diff.size  # diff.size = x.size * y.size

    # Fill output DataFrame
    stats = pd.DataFrame({}, index=['MWU'])
    stats['U-val'] = round(uval, 3)
    stats['tail'] = tail
    stats['p-val'] = pval
    stats['RBC'] = round(rbc, 3)
    stats['CLES'] = round(cles, 3)

    col_order = ['U-val', 'tail', 'p-val', 'RBC', 'CLES']
    stats = stats.reindex(columns=col_order)
    return stats
Exemplo n.º 2
0
def wilcoxon(x, y, tail='two-sided'):
    """Wilcoxon signed-rank test. It is the non-parametric version of the
    paired T-test.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations. ``x`` and ``y`` must be
        related (e.g repeated measures) and, therefore, have the same number
        of samples. Note that a listwise deletion of missing values
        is automatically applied.
    tail : string
        Specify whether to return `'one-sided'` or `'two-sided'` p-value.
        Can also be `'greater'` or `'less'` to specify the direction of the
        test. If ``tail='one-sided'``, the alternative of the test will be
        automatically detected by looking at the sign of the median of the
        differences between ``x`` and ``y``.
        For instance, if ``np.median(x - y) > 0`` and ``tail='one-sided'``,
        Pingouin will automatically set ``tail='greater'`` and vice versa.

    Returns
    -------
    stats : :py:class:`pandas.DataFrame`

        * ``'W-val'``: W-value
        * ``'p-val'``: p-value
        * ``'RBC'``   : matched pairs rank-biserial correlation (effect size)
        * ``'CLES'``  : common language effect size

    See also
    --------
    scipy.stats.wilcoxon, mwu

    Notes
    -----
    The Wilcoxon signed-rank test [1]_ tests the null hypothesis that two
    related paired samples come from the same distribution. In particular,
    it tests whether the distribution of the differences x - y is symmetric
    about zero. A continuity correction is applied by default
    (see :py:func:`scipy.stats.wilcoxon` for details).

    The matched pairs rank biserial correlation [2]_ is the simple difference
    between the proportion of favorable and unfavorable evidence; in the case
    of the Wilcoxon signed-rank test, the evidence consists of rank sums
    (Kerby 2014):

    .. math:: r = f - u

    The common language effect size is the proportion of pairs where ``x`` is
    higher than ``y``. It was first introduced by McGraw and Wong (1992) [3]_.
    Pingouin uses a brute-force version of the formula given by Vargha and
    Delaney 2000 [4]_:

    .. math:: \\text{CL} = P(X > Y) + .5 \\times P(X = Y)

    The advantage is of this method are twofold. First, the brute-force
    approach pairs each observation of ``x`` to its ``y`` counterpart, and
    therefore does not require normally distributed data. Second, the formula
    takes ties into account and therefore works with ordinal data.

    When tail is ``'less'``, the CLES is then set to :math:`1 - \\text{CL}`,
    which gives the proportion of pairs where ``x`` is *lower* than ``y``.

    .. warning :: Versions of Pingouin below 0.2.6 gave wrong two-sided
        p-values for the Wilcoxon test. P-values were accidentally squared, and
        therefore smaller. This issue has been resolved in Pingouin>=0.2.6.
        Make sure to always use the latest release.

    References
    ----------
    .. [1] Wilcoxon, F. (1945). Individual comparisons by ranking methods.
           Biometrics bulletin, 1(6), 80-83.

    .. [2] Kerby, D. S. (2014). The simple difference formula: An approach to
           teaching nonparametric correlation. Comprehensive Psychology,
           3, 11-IT.

    .. [3] McGraw, K. O., & Wong, S. P. (1992). A common language effect size
           statistic. Psychological bulletin, 111(2), 361.

    .. [4] Vargha, A., & Delaney, H. D. (2000). A Critique and Improvement of
           the “CL” Common Language Effect Size Statistics of McGraw and Wong.
           Journal of Educational and Behavioral Statistics: A Quarterly
           Publication Sponsored by the American Educational Research
           Association and the American Statistical Association, 25(2),
           101–132. https://doi.org/10.2307/1165329

    Examples
    --------
    Wilcoxon test on two related samples.

    >>> import numpy as np
    >>> import pingouin as pg
    >>> x = [20, 22, 19, 20, 22, 18, 24, 20, 19, 24, 26, 13]
    >>> y = [38, 37, 33, 29, 14, 12, 20, 22, 17, 25, 26, 16]
    >>> pg.wilcoxon(x, y, tail='two-sided')
              W-val       tail     p-val       RBC      CLES
    Wilcoxon   20.5  two-sided  0.285765 -0.378788  0.395833

    Compare with SciPy

    >>> import scipy
    >>> scipy.stats.wilcoxon(x, y, correction=True)
    WilcoxonResult(statistic=20.5, pvalue=0.2857652190231508)

    One-sided tail: one can either manually specify the alternative hypothesis

    >>> pg.wilcoxon(x, y, tail='greater')
              W-val     tail     p-val       RBC      CLES
    Wilcoxon   20.5  greater  0.876244 -0.378788  0.395833

    >>> pg.wilcoxon(x, y, tail='less')
              W-val  tail     p-val       RBC      CLES
    Wilcoxon   20.5  less  0.142883 -0.378788  0.604167

    Or simply leave it to Pingouin, using the `'one-sided'` argument, in which
    case Pingouin will look at the sign of the median of the differences
    between ``x`` and ``y`` and ajust the tail based on that:

    >>> np.median(np.array(x) - np.array(y))
    -1.5

    The median is negative, so Pingouin will test for the alternative
    hypothesis that the median of the differences is negative (= less than 0).

    >>> pg.wilcoxon(x, y, tail='one-sided')  # Equivalent to tail = 'less'
              W-val  tail     p-val       RBC      CLES
    Wilcoxon   20.5  less  0.142883 -0.378788  0.604167
    """
    x = np.asarray(x)
    y = np.asarray(y)
    x, y = remove_na(x, y, paired=True)  # Remove NA

    # Check tails
    possible_tails = ['two-sided', 'one-sided', 'greater', 'less']
    assert tail in possible_tails, 'Invalid tail argument.'
    if tail == 'one-sided':
        # Detect the direction of the test based on the median
        tail = 'less' if np.median(x - y) < 0 else 'greater'

    # Compute test
    wval, pval = scipy.stats.wilcoxon(x,
                                      y,
                                      zero_method='wilcox',
                                      correction=True,
                                      alternative=tail)

    # Effect size 1: Common Language Effect Size
    # Since Pingouin v0.3.5, CLES is tail-specific and calculated
    # according to the formula given in Vargha and Delaney 2000 which
    # works with ordinal data.
    diff = x[:, None] - y
    # cles = max((diff < 0).sum(), (diff > 0).sum()) / diff.size
    # Tail = 'greater', with ties set to 0.5
    # Note that tail = 'two-sided' gives same output as tail = 'greater'
    cles = np.where(diff == 0, 0.5, diff > 0).mean()
    cles = 1 - cles if tail == 'less' else cles

    # Effect size 2: matched-pairs rank biserial correlation (Kerby 2014)
    d = x - y
    d = d[d != 0]
    r = scipy.stats.rankdata(abs(d))
    rsum = r.sum()
    r_plus = np.sum((d > 0) * r)
    r_minus = np.sum((d < 0) * r)
    rbc = r_plus / rsum - r_minus / rsum

    # Fill output DataFrame
    stats = pd.DataFrame({}, index=['Wilcoxon'])
    stats['W-val'] = wval
    stats['tail'] = tail
    stats['p-val'] = pval
    stats['RBC'] = rbc
    stats['CLES'] = cles

    col_order = ['W-val', 'tail', 'p-val', 'RBC', 'CLES']
    stats = stats.reindex(columns=col_order)
    return stats
Exemplo n.º 3
0
def wilcoxon(x, y, tail='two-sided'):
    """Wilcoxon signed-rank test. It is the non-parametric version of the
    paired T-test.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations. ``x`` and ``y`` must be
        related (e.g repeated measures) and, therefore, have the same number
        of samples. Note that a listwise deletion of missing values
        is automatically applied.
    tail : string
        Specify whether to return `'one-sided'` or `'two-sided'` p-value.
        Can also be `'greater'` or `'less'` to specify the direction of the
        test. If ``tail='one-sided'``, the alternative of the test will be
        automatically detected by looking at the sign of the median of the
        differences between ``x`` and ``y``.
        For instance, if ``np.median(x - y) > 0`` and ``tail='one-sided'``,
        Pingouin will automatically set ``tail='greater'`` and vice versa.

    Returns
    -------
    stats : pandas DataFrame
        Test summary ::

        'W-val' : W-value
        'p-val' : p-value
        'RBC'   : matched pairs rank-biserial correlation (effect size)
        'CLES'  : common language effect size

    See also
    --------
    scipy.stats.wilcoxon, mwu

    Notes
    -----
    The Wilcoxon signed-rank test tests the null hypothesis that two related
    paired samples come from the same distribution. In particular, it tests
    whether the distribution of the differences x - y is symmetric about zero.
    A continuity correction is applied by default
    (see :py:func:`scipy.stats.wilcoxon` for details).

    The rank biserial correlation is the difference between the proportion of
    favorable evidence minus the proportion of unfavorable evidence
    (see Kerby 2014).

    The common language effect size is the probability (from 0 to 1) that a
    randomly selected observation from the first sample will be greater than a
    randomly selected observation from the second sample.

    .. warning :: Versions of Pingouin below 0.2.6 gave wrong two-sided
        p-values for the Wilcoxon test. P-values were accidentally squared, and
        therefore smaller. This issue has been resolved in Pingouin>=0.2.6.
        Make sure to always use the latest release.

    References
    ----------
    .. [1] Wilcoxon, F. (1945). Individual comparisons by ranking methods.
           Biometrics bulletin, 1(6), 80-83.

    .. [2] Kerby, D. S. (2014). The simple difference formula: An approach to
           teaching nonparametric correlation. Comprehensive Psychology,
           3, 11-IT.

    .. [3] McGraw, K. O., & Wong, S. P. (1992). A common language effect size
           statistic. Psychological bulletin, 111(2), 361.

    Examples
    --------
    Wilcoxon test on two related samples.

    >>> import numpy as np
    >>> import pingouin as pg
    >>> x = [20, 22, 19, 20, 22, 18, 24, 20, 19, 24, 26, 13]
    >>> y = [38, 37, 33, 29, 14, 12, 20, 22, 17, 25, 26, 16]
    >>> pg.wilcoxon(x, y, tail='two-sided')
              W-val       tail     p-val    RBC   CLES
    Wilcoxon   20.5  two-sided  0.285765 -0.379  0.583

    Compare with SciPy

    >>> import scipy
    >>> scipy.stats.wilcoxon(x, y, correction=True)
    WilcoxonResult(statistic=20.5, pvalue=0.2857652190231508)

    One-sided tail: one can either manually specify the alternative hypothesis

    >>> pg.wilcoxon(x, y, tail='greater')
              W-val     tail     p-val    RBC   CLES
    Wilcoxon   20.5  greater  0.876244 -0.379  0.583

    >>> pg.wilcoxon(x, y, tail='less')
              W-val  tail     p-val    RBC   CLES
    Wilcoxon   20.5  less  0.142883 -0.379  0.583

    Or simply leave it to Pingouin, using the `'one-sided'` argument, in which
    case Pingouin will look at the sign of the median of the differences
    between ``x`` and ``y`` and ajust the tail based on that:

    >>> np.median(np.array(x) - np.array(y))
    -1.5

    The median is negative, so Pingouin will test for the alternative
    hypothesis that the median of the differences is negative (= less than 0).

    >>> pg.wilcoxon(x, y, tail='one-sided')  # Equivalent to tail = 'less'
              W-val  tail     p-val    RBC   CLES
    Wilcoxon   20.5  less  0.142883 -0.379  0.583
    """
    x = np.asarray(x)
    y = np.asarray(y)

    # Remove NA
    x, y = remove_na(x, y, paired=True)

    # Check tails
    possible_tails = ['two-sided', 'one-sided', 'greater', 'less']
    assert tail in possible_tails, 'Invalid tail argument.'
    if tail == 'one-sided':
        # Detect the direction of the test based on the median
        tail = 'less' if np.median(x - y) < 0 else 'greater'

    # Compute test
    wval, pval = scipy.stats.wilcoxon(x, y, zero_method='wilcox',
                                      correction=True, alternative=tail)

    # Effect size 1: common language effect size (McGraw and Wong 1992)
    diff = x[:, None] - y
    cles = max((diff < 0).sum(), (diff > 0).sum()) / diff.size

    # Effect size 2: matched-pairs rank biserial correlation (Kerby 2014)
    d = x - y
    d = d[d != 0]
    r = scipy.stats.rankdata(abs(d))
    rsum = r.sum()
    r_plus = np.sum((d > 0) * r)
    r_minus = np.sum((d < 0) * r)
    rbc = r_plus / rsum - r_minus / rsum

    # Fill output DataFrame
    stats = pd.DataFrame({}, index=['Wilcoxon'])
    stats['W-val'] = round(wval, 3)
    stats['tail'] = tail
    stats['p-val'] = pval
    stats['RBC'] = round(rbc, 3)
    stats['CLES'] = round(cles, 3)

    col_order = ['W-val', 'tail', 'p-val', 'RBC', 'CLES']
    stats = stats.reindex(columns=col_order)
    return stats
Exemplo n.º 4
0
def mwu(x, y, tail='two-sided'):
    """Mann-Whitney U Test (= Wilcoxon rank-sum test). It is the non-parametric
    version of the independent T-test.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations. ``x`` and ``y`` must be
        independent.
    tail : string
        Specify whether to return `'one-sided'` or `'two-sided'` p-value.
        Can also be `'greater'` or `'less'` to specify the direction of the
        test. If ``tail='one-sided'``, the alternative of the test will be
        automatically detected by comparing the medians of ``x`` and ``y``.
        For instance, if median(``x``) < median(``y``) and
        ``tail='one-sided'``, Pingouin will automatically set ``tail='less'``,
        and vice versa.

    Returns
    -------
    stats : :py:class:`pandas.DataFrame`

        * ``'U-val'``: U-value
        * ``'p-val'``: p-value
        * ``'RBC'``   : rank-biserial correlation
        * ``'CLES'``  : common language effect size

    See also
    --------
    scipy.stats.mannwhitneyu, wilcoxon, ttest

    Notes
    -----
    The Mann–Whitney U test [1]_ (also called Wilcoxon rank-sum test) is a
    non-parametric test of the null hypothesis that it is equally likely that
    a randomly selected value from one sample will be less than or greater
    than a randomly selected value from a second sample. The test assumes
    that the two samples are independent. This test corrects for ties and by
    default uses a continuity correction
    (see :py:func:`scipy.stats.mannwhitneyu` for details).

    The rank biserial correlation [2]_ is the difference between
    the proportion of favorable evidence minus the proportion of unfavorable
    evidence.

    The common language effect size is the proportion of pairs where ``x`` is
    higher than ``y``. It was first introduced by McGraw and Wong (1992) [3]_.
    Pingouin uses a brute-force version of the formula given by Vargha and
    Delaney 2000 [4]_:

    .. math:: \\text{CL} = P(X > Y) + .5 \\times P(X = Y)

    The advantage is of this method are twofold. First, the brute-force
    approach pairs each observation of ``x`` to its ``y`` counterpart, and
    therefore does not require normally distributed data. Second, the formula
    takes ties into account and therefore works with ordinal data.

    When tail is ``'less'``, the CLES is then set to :math:`1 - \\text{CL}`,
    which gives the proportion of pairs where ``x`` is *lower* than ``y``.

    References
    ----------
    .. [1] Mann, H. B., & Whitney, D. R. (1947). On a test of whether one of
           two random variables is stochastically larger than the other.
           The annals of mathematical statistics, 50-60.

    .. [2] Kerby, D. S. (2014). The simple difference formula: An approach to
           teaching nonparametric correlation. Comprehensive Psychology,
           3, 11-IT.

    .. [3] McGraw, K. O., & Wong, S. P. (1992). A common language effect size
           statistic. Psychological bulletin, 111(2), 361.

    .. [4] Vargha, A., & Delaney, H. D. (2000). A Critique and Improvement of
        the “CL” Common Language Effect Size Statistics of McGraw and Wong.
        Journal of Educational and Behavioral Statistics: A Quarterly
        Publication Sponsored by the American Educational Research
        Association and the American Statistical Association, 25(2),
        101–132. https://doi.org/10.2307/1165329

    Examples
    --------
    >>> import numpy as np
    >>> import pingouin as pg
    >>> np.random.seed(123)
    >>> x = np.random.uniform(low=0, high=1, size=20)
    >>> y = np.random.uniform(low=0.2, high=1.2, size=20)
    >>> pg.mwu(x, y, tail='two-sided')
         U-val       tail    p-val    RBC    CLES
    MWU   97.0  two-sided  0.00556  0.515  0.2425

    Compare with SciPy

    >>> import scipy
    >>> scipy.stats.mannwhitneyu(x, y, use_continuity=True,
    ...                          alternative='two-sided')
    MannwhitneyuResult(statistic=97.0, pvalue=0.0055604599321374135)

    One-sided tail: one can either manually specify the alternative hypothesis

    >>> pg.mwu(x, y, tail='greater')
         U-val     tail     p-val    RBC    CLES
    MWU   97.0  greater  0.997442  0.515  0.2425

    >>> pg.mwu(x, y, tail='less')
         U-val  tail    p-val    RBC    CLES
    MWU   97.0  less  0.00278  0.515  0.7575

    Or simply leave it to Pingouin, using the `'one-sided'` argument, in which
    case Pingouin will compare the medians of ``x`` and ``y`` and select the
    most appropriate tail based on that:

    >>> # Since np.median(x) < np.median(y), this is equivalent to tail='less'
    >>> pg.mwu(x, y, tail='one-sided')
         U-val  tail    p-val    RBC    CLES
    MWU   97.0  less  0.00278  0.515  0.7575
    """
    x = np.asarray(x)
    y = np.asarray(y)

    # Remove NA
    x, y = remove_na(x, y, paired=False)

    # Check tails
    possible_tails = ['two-sided', 'one-sided', 'greater', 'less']
    assert tail in possible_tails, 'Invalid tail argument.'
    if tail == 'one-sided':
        # Detect the direction of the test based on the median
        tail = 'less' if np.median(x) < np.median(y) else 'greater'

    uval, pval = scipy.stats.mannwhitneyu(x, y, use_continuity=True,
                                          alternative=tail)

    # Effect size 1: Common Language Effect Size
    # CLES is tail-specific and calculated according to the formula given in
    # Vargha and Delaney 2000 which works with ordinal data.
    diff = x[:, None] - y
    # cles = max((diff < 0).sum(), (diff > 0).sum()) / diff.size
    # Tail = 'greater', with ties set to 0.5
    # Note that tail = 'two-sided' gives same output as tail = 'greater'
    cles = np.where(diff == 0, 0.5, diff > 0).mean()
    cles = 1 - cles if tail == 'less' else cles

    # Effect size 2: rank biserial correlation (Wendt 1972)
    rbc = 1 - (2 * uval) / diff.size  # diff.size = x.size * y.size

    # Fill output DataFrame
    stats = pd.DataFrame({
        'U-val': uval,
        'tail': tail,
        'p-val': pval,
        'RBC': rbc,
        'CLES': cles}, index=['MWU'])
    return _postprocess_dataframe(stats)
Exemplo n.º 5
0
def mwu(x, y, tail='two-sided'):
    """Mann-Whitney U Test (= Wilcoxon rank-sum test). It is the non-parametric
    version of the independent T-test.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations. x and y must be independent.
    tail : string
        Specify whether to return 'one-sided' or 'two-sided' p-value.

    Returns
    -------
    stats : pandas DataFrame
        Test summary ::

        'U-val' : U-value
        'p-val' : p-value
        'RBC'   : rank-biserial correlation (effect size)
        'CLES'  : common language effect size

    Notes
    -----
    mwu tests the hypothesis that data in x and y are samples from continuous
    distributions with equal medians. The test assumes that x and y
    are independent. This test corrects for ties and by default
    uses a continuity correction (see :py:func:`scipy.stats.mannwhitneyu`
    for details).

    The rank biserial correlation is the difference between the proportion of
    favorable evidence minus the proportion of unfavorable evidence
    (see Kerby 2014).

    The common language effect size is the probability (from 0 to 1) that a
    randomly selected observation from the first sample will be greater than a
    randomly selected observation from the second sample.

    References
    ----------
    .. [1] Mann, H. B., & Whitney, D. R. (1947). On a test of whether one of
           two random variables is stochastically larger than the other.
           The annals of mathematical statistics, 50-60.

    .. [2] Kerby, D. S. (2014). The simple difference formula: An approach to
           teaching nonparametric correlation. Comprehensive Psychology,
           3, 11-IT.

    .. [3] McGraw, K. O., & Wong, S. P. (1992). A common language effect size
           statistic. Psychological bulletin, 111(2), 361.

    Examples
    --------
    >>> import numpy as np
    >>> from pingouin import mwu
    >>> np.random.seed(123)
    >>> x = np.random.uniform(low=0, high=1, size=20)
    >>> y = np.random.uniform(low=0.2, high=1.2, size=20)
    >>> mwu(x, y, tail='two-sided')
         U-val    p-val    RBC   CLES
    MWU   97.0  0.00556  0.515  0.758
    """
    from scipy.stats import mannwhitneyu
    x = np.asarray(x)
    y = np.asarray(y)

    # Remove NA
    x, y = remove_na(x, y, paired=False)

    # Compute test
    if tail == 'one-sided':
        tail = 'less' if np.median(x) < np.median(y) else 'greater'
    uval, pval = mannwhitneyu(x, y, use_continuity=True, alternative=tail)

    # Effect size 1: common language effect size (McGraw and Wong 1992)
    diff = x[:, None] - y
    cles = max((diff < 0).sum(), (diff > 0).sum()) / diff.size

    # Effect size 2: rank biserial correlation (Wendt 1972)
    rbc = 1 - (2 * uval) / diff.size  # diff.size = x.size * y.size

    # Fill output DataFrame
    stats = pd.DataFrame({}, index=['MWU'])
    stats['U-val'] = round(uval, 3)
    stats['p-val'] = pval
    stats['RBC'] = round(rbc, 3)
    stats['CLES'] = round(cles, 3)

    col_order = ['U-val', 'p-val', 'RBC', 'CLES']
    stats = stats.reindex(columns=col_order)
    return stats
Exemplo n.º 6
0
def wilcoxon(x, y, tail='two-sided'):
    """Wilcoxon signed-rank test. It is the non-parametric version of the
    paired T-test.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations. x and y must be related (e.g
        repeated measures).
    tail : string
        Specify whether to return 'one-sided' or 'two-sided' p-value.

    Returns
    -------
    stats : pandas DataFrame
        Test summary ::

        'W-val' : W-value
        'p-val' : p-value
        'RBC'   : matched pairs rank-biserial correlation (effect size)
        'CLES'  : common language effect size

    Notes
    -----
    The Wilcoxon signed-rank test tests the null hypothesis that two related
    paired samples come from the same distribution.
    A continuity correction is applied by default
    (see :py:func:`scipy.stats.wilcoxon` for details).

    The rank biserial correlation is the difference between the proportion of
    favorable evidence minus the proportion of unfavorable evidence
    (see Kerby 2014).

    The common language effect size is the probability (from 0 to 1) that a
    randomly selected observation from the first sample will be greater than a
    randomly selected observation from the second sample.

    References
    ----------
    .. [1] Wilcoxon, F. (1945). Individual comparisons by ranking methods.
           Biometrics bulletin, 1(6), 80-83.

    .. [2] Kerby, D. S. (2014). The simple difference formula: An approach to
           teaching nonparametric correlation. Comprehensive Psychology,
           3, 11-IT.

    .. [3] McGraw, K. O., & Wong, S. P. (1992). A common language effect size
           statistic. Psychological bulletin, 111(2), 361.

    Examples
    --------
    1. Wilcoxon test on two related samples.

    >>> import numpy as np
    >>> from pingouin import wilcoxon
    >>> x = [20, 22, 19, 20, 22, 18, 24, 20, 19, 24, 26, 13]
    >>> y = [38, 37, 33, 29, 14, 12, 20, 22, 17, 25, 26, 16]
    >>> wilcoxon(x, y, tail='two-sided')
              W-val     p-val    RBC   CLES
    Wilcoxon   20.5  0.070844  0.333  0.583
    """
    from scipy.stats import wilcoxon
    x = np.asarray(x)
    y = np.asarray(y)

    # Remove NA
    x, y = remove_na(x, y, paired=True)

    # Compute test
    wval, pval = wilcoxon(x, y, zero_method='wilcox', correction=False)
    pval *= .5 if tail == 'one-sided' else pval

    # Effect size 1: common language effect size (McGraw and Wong 1992)
    diff = x[:, None] - y
    cles = max((diff < 0).sum(), (diff > 0).sum()) / diff.size

    # Effect size 2: matched-pairs rank biserial correlation (Kerby 2014)
    rank = np.arange(x.size, 0, -1)
    rsum = rank.sum()
    fav = rank[np.sign(y - x) > 0].sum()
    unfav = rank[np.sign(y - x) < 0].sum()
    rbc = fav / rsum - unfav / rsum

    # Fill output DataFrame
    stats = pd.DataFrame({}, index=['Wilcoxon'])
    stats['W-val'] = round(wval, 3)
    stats['p-val'] = pval
    stats['RBC'] = round(rbc, 3)
    stats['CLES'] = round(cles, 3)

    col_order = ['W-val', 'p-val', 'RBC', 'CLES']
    stats = stats.reindex(columns=col_order)
    return stats
Exemplo n.º 7
0
def wilcoxon(x, y, tail='two-sided'):
    """Wilcoxon signed-rank test. It is the non-parametric version of the
    paired T-test.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations. x and y must be related (e.g
        repeated measures).
    tail : string
        Specify whether to return 'one-sided' or 'two-sided' p-value.

    Returns
    -------
    stats : pandas DataFrame
        Test summary ::

        'W-val' : W-value
        'p-val' : p-value
        'RBC'   : matched pairs rank-biserial correlation (effect size)
        'CLES'  : common language effect size

    Notes
    -----
    The Wilcoxon signed-rank test tests the null hypothesis that two related
    paired samples come from the same distribution.
    A continuity correction is applied by default
    (see :py:func:`scipy.stats.wilcoxon` for details).

    The rank biserial correlation is the difference between the proportion of
    favorable evidence minus the proportion of unfavorable evidence
    (see Kerby 2014).

    The common language effect size is the probability (from 0 to 1) that a
    randomly selected observation from the first sample will be greater than a
    randomly selected observation from the second sample.

    .. warning :: Versions of Pingouin below 0.2.6 gave wrong two-sided
        p-values for the Wilcoxon test. P-values were accidentally squared, and
        therefore smaller. This issue has been resolved in Pingouin>=0.2.6.
        Make sure to always use the latest release.

    References
    ----------
    .. [1] Wilcoxon, F. (1945). Individual comparisons by ranking methods.
           Biometrics bulletin, 1(6), 80-83.

    .. [2] Kerby, D. S. (2014). The simple difference formula: An approach to
           teaching nonparametric correlation. Comprehensive Psychology,
           3, 11-IT.

    .. [3] McGraw, K. O., & Wong, S. P. (1992). A common language effect size
           statistic. Psychological bulletin, 111(2), 361.

    Examples
    --------
    1. Wilcoxon test on two related samples.

    >>> import numpy as np
    >>> import pingouin as pg
    >>> x = [20, 22, 19, 20, 22, 18, 24, 20, 19, 24, 26, 13]
    >>> y = [38, 37, 33, 29, 14, 12, 20, 22, 17, 25, 26, 16]
    >>> pg.wilcoxon(x, y, tail='two-sided')
              W-val     p-val    RBC   CLES
    Wilcoxon   20.5  0.285765  0.333  0.583

    Compare with SciPy

    >>> import scipy
    >>> scipy.stats.wilcoxon(x, y, correction=True)
    WilcoxonResult(statistic=20.5, pvalue=0.2857652190231508)
    """
    x = np.asarray(x)
    y = np.asarray(y)

    # Remove NA
    x, y = remove_na(x, y, paired=True)

    # Compute test
    # TODO: scipy 1.3.0 includes an alternative arg as for mannwhitney
    # For now keeping this way to keep the compatibility with previous scipy
    wval, pval = scipy.stats.wilcoxon(x,
                                      y,
                                      zero_method='wilcox',
                                      correction=True)
    pval = pval * .5 if tail == 'one-sided' else pval

    # Effect size 1: common language effect size (McGraw and Wong 1992)
    diff = x[:, None] - y
    cles = max((diff < 0).sum(), (diff > 0).sum()) / diff.size

    # Effect size 2: matched-pairs rank biserial correlation (Kerby 2014)
    rank = np.arange(x.size, 0, -1)
    rsum = rank.sum()
    fav = rank[np.sign(y - x) > 0].sum()
    unfav = rank[np.sign(y - x) < 0].sum()
    rbc = fav / rsum - unfav / rsum

    # Fill output DataFrame
    stats = pd.DataFrame({}, index=['Wilcoxon'])
    stats['W-val'] = round(wval, 3)
    stats['p-val'] = pval
    stats['RBC'] = round(rbc, 3)
    stats['CLES'] = round(cles, 3)

    col_order = ['W-val', 'p-val', 'RBC', 'CLES']
    stats = stats.reindex(columns=col_order)
    return stats
Exemplo n.º 8
0
def wilcoxon(x, y=None, alternative='two-sided', **kwargs):
    """
    Wilcoxon signed-rank test. It is the non-parametric version of the paired T-test.

    Parameters
    ----------
    x : array_like
        Either the first set of measurements
        (in which case y is the second set of measurements),
        or the differences between two sets of measurements
        (in which case y is not to be specified.) Must be one-dimensional.
    y : array_like
        Either the second set of measurements (if x is the first set of
        measurements), or not specified (if x is the differences between
        two sets of measurements.) Must be one-dimensional.
    alternative : string
        Defines the alternative hypothesis, or tail of the test. Must be one of
        "two-sided" (default), "greater" or "less". See :py:func:`scipy.stats.wilcoxon` for
        more details.
    **kwargs : dict
        Additional keywords arguments that are passed to :py:func:`scipy.stats.wilcoxon`.

    Returns
    -------
    stats : :py:class:`pandas.DataFrame`

        * ``'W-val'``: W-value
        * ``'alternative'``: tail of the test
        * ``'p-val'``: p-value
        * ``'RBC'``   : matched pairs rank-biserial correlation (effect size)
        * ``'CLES'``  : common language effect size

    See also
    --------
    scipy.stats.wilcoxon, mwu

    Notes
    -----
    The Wilcoxon signed-rank test [1]_ tests the null hypothesis that two
    related paired samples come from the same distribution. In particular,
    it tests whether the distribution of the differences x - y is symmetric
    about zero.

    .. important:: Pingouin automatically applies a continuity correction.
        Therefore, the p-values will be slightly different than
        :py:func:`scipy.stats.wilcoxon` unless ``correction=True`` is
        explicitly passed to the latter.

    In addition to the test statistic and p-values, Pingouin also computes two
    measures of effect size. The matched pairs rank biserial correlation [2]_
    is the simple difference between the proportion of favorable and
    unfavorable evidence; in the case of the Wilcoxon signed-rank test,
    the evidence consists of rank sums (Kerby 2014):

    .. math:: r = f - u

    The common language effect size is the proportion of pairs where ``x`` is
    higher than ``y``. It was first introduced by McGraw and Wong (1992) [3]_.
    Pingouin uses a brute-force version of the formula given by Vargha and
    Delaney 2000 [4]_:

    .. math:: \\text{CL} = P(X > Y) + .5 \\times P(X = Y)

    The advantage is of this method are twofold. First, the brute-force
    approach pairs each observation of ``x`` to its ``y`` counterpart, and
    therefore does not require normally distributed data. Second, the formula
    takes ties into account and therefore works with ordinal data.

    When tail is ``'less'``, the CLES is then set to :math:`1 - \\text{CL}`,
    which gives the proportion of pairs where ``x`` is *lower* than ``y``.

    References
    ----------
    .. [1] Wilcoxon, F. (1945). Individual comparisons by ranking methods.
           Biometrics bulletin, 1(6), 80-83.

    .. [2] Kerby, D. S. (2014). The simple difference formula: An approach to
           teaching nonparametric correlation. Comprehensive Psychology,
           3, 11-IT.

    .. [3] McGraw, K. O., & Wong, S. P. (1992). A common language effect size
           statistic. Psychological bulletin, 111(2), 361.

    .. [4] Vargha, A., & Delaney, H. D. (2000). A Critique and Improvement of
           the “CL” Common Language Effect Size Statistics of McGraw and Wong.
           Journal of Educational and Behavioral Statistics: A Quarterly
           Publication Sponsored by the American Educational Research
           Association and the American Statistical Association, 25(2),
           101–132. https://doi.org/10.2307/1165329

    Examples
    --------
    Wilcoxon test on two related samples.

    >>> import numpy as np
    >>> import pingouin as pg
    >>> x = np.array([20, 22, 19, 20, 22, 18, 24, 20, 19, 24, 26, 13])
    >>> y = np.array([38, 37, 33, 29, 14, 12, 20, 22, 17, 25, 26, 16])
    >>> pg.wilcoxon(x, y, alternative='two-sided')
              W-val alternative     p-val       RBC      CLES
    Wilcoxon   20.5   two-sided  0.285765 -0.378788  0.395833

    Same but using pre-computed differences. However, the CLES effect size
    cannot be computed as it requires the raw data.

    >>> pg.wilcoxon(x - y)
              W-val alternative     p-val       RBC  CLES
    Wilcoxon   20.5   two-sided  0.285765 -0.378788   NaN

    Compare with SciPy

    >>> import scipy
    >>> scipy.stats.wilcoxon(x, y)
    WilcoxonResult(statistic=20.5, pvalue=0.2661660677806492)

    The p-value is not exactly similar to Pingouin. This is because Pingouin automatically applies
    a continuity correction. Disabling it gives the same p-value as scipy:

    >>> pg.wilcoxon(x, y, alternative='two-sided', correction=False)
              W-val alternative     p-val       RBC      CLES
    Wilcoxon   20.5   two-sided  0.266166 -0.378788  0.395833

    One-sided test

    >>> pg.wilcoxon(x, y, alternative='greater')
              W-val alternative     p-val       RBC      CLES
    Wilcoxon   20.5     greater  0.876244 -0.378788  0.395833

    >>> pg.wilcoxon(x, y, alternative='less')
              W-val alternative     p-val       RBC      CLES
    Wilcoxon   20.5        less  0.142883 -0.378788  0.604167
    """
    x = np.asarray(x)
    if y is not None:
        y = np.asarray(y)
        x, y = remove_na(x, y, paired=True)  # Remove NA
    else:
        x = x[~np.isnan(x)]

    # Check tails
    assert alternative in ['two-sided', 'greater', 'less'], (
        "Alternative must be one of 'two-sided' (default), 'greater' or 'less'.")
    if "tail" in kwargs:
        raise ValueError(
            "Since Pingouin 0.4.0, the 'tail' argument has been renamed to 'alternative'.")

    # Compute test
    if "correction" not in kwargs:
        kwargs["correction"] = True
    wval, pval = scipy.stats.wilcoxon(x=x, y=y, alternative=alternative, **kwargs)

    # Effect size 1: Common Language Effect Size
    # Since Pingouin v0.3.5, CLES is tail-specific and calculated
    # according to the formula given in Vargha and Delaney 2000 which
    # works with ordinal data.
    if y is not None:
        diff = x[:, None] - y
        # cles = max((diff < 0).sum(), (diff > 0).sum()) / diff.size
        # alternative = 'greater', with ties set to 0.5
        # Note that alternative = 'two-sided' gives same output as alternative = 'greater'
        cles = np.where(diff == 0, 0.5, diff > 0).mean()
        cles = 1 - cles if alternative == 'less' else cles
    else:
        # CLES cannot be computed if y is None
        cles = np.nan

    # Effect size 2: matched-pairs rank biserial correlation (Kerby 2014)
    if y is not None:
        d = x - y
        d = d[d != 0]
    else:
        d = x[x != 0]
    r = scipy.stats.rankdata(abs(d))
    rsum = r.sum()
    r_plus = np.sum((d > 0) * r)
    r_minus = np.sum((d < 0) * r)
    rbc = r_plus / rsum - r_minus / rsum

    # Fill output DataFrame
    stats = pd.DataFrame({
        'W-val': wval,
        'alternative': alternative,
        'p-val': pval,
        'RBC': rbc,
        'CLES': cles}, index=['Wilcoxon'])
    return _postprocess_dataframe(stats)