Пример #1
0
    def test_zprob(self):
        """zprob should match twice the z_high probability for abs(z)"""

        probs = [2 * i for i in [
            5.000000e-01, 4.960106e-01, 4.601722e-01, 3.085375e-01,
            1.586553e-01, 2.275013e-02, 2.866516e-07, 7.619853e-24,
            2.753624e-89, 4.906714e-198, 0.000000e+00, 0.000000e+00]]

        for z, p in zip(self.values, probs):
            np.testing.assert_allclose(zprob(z), p, atol=10e-7)
        for z, p in zip(self.negvalues, probs):
            np.testing.assert_allclose(zprob(z), p, atol=10e-7)
Пример #2
0
    def test_zprob(self):
        """zprob should match twice the z_high probability for abs(z)"""

        probs = [
            2 * i for i in [
                5.000000e-01, 4.960106e-01, 4.601722e-01, 3.085375e-01,
                1.586553e-01, 2.275013e-02, 2.866516e-07, 7.619853e-24,
                2.753624e-89, 4.906714e-198, 0.000000e+00, 0.000000e+00
            ]
        ]

        for z, p in zip(self.values, probs):
            np.testing.assert_allclose(zprob(z), p, atol=10e-7)
        for z, p in zip(self.negvalues, probs):
            np.testing.assert_allclose(zprob(z), p, atol=10e-7)
Пример #3
0
def mw_t(x, y):
    """computes the Mann-Whitney U statistic and the probability using the
    normal approximation"""
    if len(x) > len(y):
        x, y = y, x

    num_x = len(x)
    num_y = len(y)

    x = zip(x, np.zeros(len(x), int), np.zeros(len(x), int))
    y = zip(y, np.ones(len(y), int), np.zeros(len(y), int))
    combined = x + y
    combined = np.array(combined, dtype=[('stat', float), ('sample', int),
                                         ('rank', float)])
    combined.sort(order='stat')
    prev = None
    start = None
    ties = False
    T = 0.0
    for index in range(combined.shape[0]):
        value = combined['stat'][index]
        sample = combined['sample'][index]
        if value == prev and start is None:
            start = index
            continue

        if value != prev and start is not None:
            ties = True
            ave_rank = _average_rank(start, index)
            num_tied = index - start + 1
            T += (num_tied ** 3 - num_tied)
            for i in range(start - 1, index):
                combined['rank'][i] = ave_rank
            start = None
        combined['rank'][index] = index + 1
        prev = value

    if start is not None:
        ave_rank = _average_rank(start, index)
        num_tied = index - start + 2
        T += (num_tied ** 3 - num_tied)
        for i in range(start - 1, index + 1):
            combined['rank'][i] = ave_rank

    total = combined.shape[0]
    x_ranks_sum = np.sum(combined['rank'][i]
                         for i in range(total) if combined['sample'][i] == 0)
    prod = num_x * num_y
    U1 = prod + (num_x * (num_x + 1) / 2) - x_ranks_sum
    U2 = prod - U1
    U = max([U1, U2])
    numerator = U - prod / 2
    denominator = np.sqrt((prod / (total * (total - 1))) *
                          ((total ** 3 - total - T) / 12))
    z = (numerator / denominator)
    p = zprob(z)
    return U, p