Пример #1
0
def calc_spearman_t(r,n,tails='two-tailed',eps=1e-10):
    """Calculate the t value, and probability for Spearman correlation

    Equation from Wikipedia article on Spearman rank correlation:
    http://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient

    TODO:  verify with additional / published sources.
    TODO:  want to find the correct handling for the
    case of perfect correlation
    """
    #Right now I subtract an arbitrarily tiny
    #epsilon if r is exactly 1.0 to prevent divide-by-zero
    #errors.

    if r == 1.0:
        r = r-eps

    t = r*(((n-2)/(1.0-r**2))**0.5)

    if tails == 'two-tailed':
        prob = tprob(t,n-2)
    elif tails == 'high':
        prob = t_high(t,n-2)
    elif tails == 'low':
        prob = t_low(t,n-2)
    else:
        raise RuntimeError("Valid prob. methods are 'two-tailed','high',and 'low'")

    return prob
Пример #2
0
def calc_spearman_t(r, n, tails='two-tailed', eps=1e-10):
    """Calculate the t value, and probability for Spearman correlation
    
    Equation from Wikipedia article on Spearman rank correlation:
    http://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient
    
    TODO:  verify with additional / published sources.
    TODO:  want to find the correct handling for the 
    case of perfect correlation
    """
    #Right now I subtract an arbitrarily tiny
    #epsilon if r is exactly 1.0 to prevent divide-by-zero
    #errors.

    if r == 1.0:
        r = r - eps

    t = r * (((n - 2) / (1.0 - r**2))**0.5)

    if tails == 'two-tailed':
        prob = tprob(t, n - 2)
    elif tails == 'high':
        prob = t_high(t, n - 2)
    elif tails == 'low':
        prob = t_low(t, n - 2)
    else:
        raise RuntimeError(
            "Valid prob. methods are 'two-tailed','high',and 'low'")

    return prob
Пример #3
0
def t_tailed_prob(t, df, tails):
    """Return appropriate p-value for given t and df, depending on tails."""
    if tails == 'high':
        return t_high(t, df)
    elif tails == 'low':
        return t_low(t, df)
    else:
        return tprob(t,df)
Пример #4
0
def correlation(x_items, y_items):
    """Returns Pearson correlation between x and y, and its significance.
    
    WARNING: x_items and y_items must be same length!
    """
    r = pearson(x_items, y_items)
    n = len(x_items)
    if n < 3:
        prob = 1
    else:
        try:
            t = r/sqrt((1 - (r*r))/(n-2))
            prob = tprob(t, n-2)
        except (ZeroDivisionError, FloatingPointError): #r was presumably 1
            prob = 0
    return (r, prob)
Пример #5
0
    def test_tprob(self):
        """tprob should match twice the t_high probability for abs(t)"""

        probs = {
        1:  [ 2*i for i in 
            [   0.500000000, 0.496817007, 0.468274483, 0.352416382,
                0.250000000, 0.147583618, 0.062832958, 0.031725517,
                0.015902251, 0.010606402, 0.006365349, 0.001591536,
            ]],
        10: [ 2*i for i in
            [   5.000000e-01, 4.961090e-01, 4.611604e-01, 3.139468e-01,
                1.704466e-01, 3.669402e-02, 2.686668e-04, 7.947766e-07,
                1.073031e-09, 1.980896e-11, 1.237155e-13, 1.200254e-19,
            ]],
        100:[ 2*i for i in
            [   5.000000e-01, 4.960206e-01, 4.602723e-01, 3.090868e-01,
                1.598621e-01, 2.410609e-02, 1.225087e-06, 4.950844e-17,
                4.997134e-37, 4.190166e-52, 7.236082e-73, 2.774197e-132,
            ]],
        }
        for df in self.df:
            for x, p in zip(self.values, probs[df]):
                self.assertFloatEqualRel(tprob(x, df), p, eps=1e-4)