Exemplo n.º 1
0
def ks_test(x, y=None, alt="two sided", exact = None, warn_for_ties = True):
    """Returns the statistic and probability from the Kolmogorov-Smirnov test.
    
    Arguments:
        - x, y: vectors of numbers whose distributions are to be compared.
        - alt: the alternative hypothesis, default is 2-sided.
        - exact: whether to compute the exact probability
        - warn_for_ties: warns when values are tied. This should left at True
          unless a monte carlo variant, like ks_boot, is being used.
    
    Note the 1-sample cases are not implemented, although their cdf's are
    implemented in ks.py"""
    # translation from R 2.4
    num_x = len(x)
    num_y = None
    x = zip(x, zeros(len(x), int))
    lo = ["less", "lo", "lower", "l", "lt"]
    hi = ["greater", "hi", "high", "h", "g", "gt"]
    two = ["two sided", "2", 2, "two tailed", "two", "two.sided"]
    Pval = None
    if y is not None: # in anticipation of actually implementing the 1-sample cases
        num_y = len(y)
        y = zip(y, ones(len(y), int))
        n = num_x * num_y / (num_x + num_y)
        combined = x + y
        if len(set(combined)) < num_x + num_y:
            ties = True
        else:
            ties = False
        
        combined = array(combined, dtype=[('stat', float), ('sample', int)])
        combined.sort(order='stat')
        cumsum = zeros(combined.shape[0], float)
        scales = array([1/num_x, -1/num_y])
        indices = combined['sample']
        cumsum = scales.take(indices)
        cumsum = cumsum.cumsum()
        if exact == None:
            exact = num_x * num_y < 1e4
        
        if alt in two:
            stat = max(fabs(cumsum))
        elif alt in lo:
            stat = -cumsum.min()
        elif alt in hi:
            stat = cumsum.max()
        else:
            raise RuntimeError, "Unknown alt: %s" % alt
        if exact and alt in two and not ties:
            Pval = 1 - psmirnov2x(stat, num_x, num_y)
    else:
        raise NotImplementedError
    
    if Pval == None:
        if alt in two:
            Pval = 1 - pkstwo(sqrt(n) * stat)
        else:
            Pval = exp(-2 * n * stat**2)
    
    if ties and warn_for_ties:
        warnings.warn("Cannot compute correct KS probability with ties")
    
    try: # if numpy arrays were input, the Pval can be an array of len==1
        Pval = Pval[0]
    except (TypeError, IndexError):
        pass
    return stat, Pval
Exemplo n.º 2
0
 def test_ps2x(self):
     """2 sample 2-sided smirnov should match answers from R"""
     self.assertFloatEqual(psmirnov2x(0.48, 20, 50), 0.9982277)
     self.assertFloatEqual(psmirnov2x(0.28, 20, 50), 0.8161612)
     self.assertFloatEqual(psmirnov2x(0.28, 50, 20), 0.8161612)
Exemplo n.º 3
0
 def test_ps2x(self):
     """2 sample 2-sided smirnov should match answers from R"""
     self.assertFloatEqual(psmirnov2x(0.48, 20, 50), 0.9982277)
     self.assertFloatEqual(psmirnov2x(0.28, 20, 50), 0.8161612)
     self.assertFloatEqual(psmirnov2x(0.28, 50, 20), 0.8161612)