示例#1
0
 def pearson(self):
     """
     Calculates the Pearson's product-moment coefficient by the formula:
     
                 (N * sum_xy) - (sum_x * sum_y)
     --------------------------------------------------------------
     ((N * sum_x2 - (sum_x)**2) * (N * sum_y2 - (sum_y)**2)) ** 0.5
     """
     sname = self.listSamples()
     if self.sample[sname[0]].rowcount == self.sample[sname[1]].rowcount:
         slen = self.sample[sname[0]].rowcount
     elif self.sample[sname[0]].rowcount > self.sample[sname[1]].rowcount:
         slen = self.sample[sname[1]].rowcount
     else: slen = self.sample[sname[0]].rowcount
     sum_x = summation([self.sample[sname[0]].data[i] 
                         for i in range(slen)])
     sum_x2 = summation([self.sample[sname[0]].data[i] * \
                         self.sample[sname[0]].data[i] 
                         for i in range(slen)])
     sum_y = summation([self.sample[sname[1]].data[i] 
                         for i in range(slen)])
     sum_y2 = summation([self.sample[sname[1]].data[i] * \
                         self.sample[sname[1]].data[i]
                         for i in range(slen)])
     sum_xy = summation([self.sample[sname[0]].data[i] * \
                         self.sample[sname[1]].data[i]
                         for i in range(slen)])
     numerator = (slen * sum_xy) - (sum_x * sum_y)
     denominator_x = (slen * sum_x2) - (sum_x * sum_x)
     denominator_y = (slen * sum_y2) - (sum_y * sum_y)
     return float(numerator / ((denominator_x * denominator_y) ** 0.5))
示例#2
0
 def pearson(self):
     """
     Calculates the Pearson's product-moment coefficient by the formula:
     
                 (N * sum_xy) - (sum_x * sum_y)
     --------------------------------------------------------------
     ((N * sum_x2 - (sum_x)**2) * (N * sum_y2 - (sum_y)**2)) ** 0.5
     """
     sname = self.listSamples()
     if self.sample[sname[0]].rowcount == self.sample[sname[1]].rowcount:
         slen = self.sample[sname[0]].rowcount
     elif self.sample[sname[0]].rowcount > self.sample[sname[1]].rowcount:
         slen = self.sample[sname[1]].rowcount
     else:
         slen = self.sample[sname[0]].rowcount
     sum_x = summation([self.sample[sname[0]].data[i] for i in range(slen)])
     sum_x2 = summation([self.sample[sname[0]].data[i] * \
                         self.sample[sname[0]].data[i]
                         for i in range(slen)])
     sum_y = summation([self.sample[sname[1]].data[i] for i in range(slen)])
     sum_y2 = summation([self.sample[sname[1]].data[i] * \
                         self.sample[sname[1]].data[i]
                         for i in range(slen)])
     sum_xy = summation([self.sample[sname[0]].data[i] * \
                         self.sample[sname[1]].data[i]
                         for i in range(slen)])
     numerator = (slen * sum_xy) - (sum_x * sum_y)
     denominator_x = (slen * sum_x2) - (sum_x * sum_x)
     denominator_y = (slen * sum_y2) - (sum_y * sum_y)
     return float(numerator / ((denominator_x * denominator_y)**0.5))
示例#3
0
def Bray_Curtis(original, test):
    """
    Bray-Curtis Distance is distance measure for interval or ratio data.
    
    @see: Bray JR and Curtis JT. 1957. An ordination of the upland forest
    communities of S. Winconsin. Ecological Monographs 27: 325-349.
    
    @param original: list of original data
    @param test: list of data to test against original"""
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Bray-Curtis distance")
    return Manhattan(original, test) / (summation(original) + summation(test))
示例#4
0
def Bray_Curtis(original, test):
    """
    Bray-Curtis Distance is distance measure for interval or ratio data.
    
    @see: Bray JR and Curtis JT. 1957. An ordination of the upland forest
    communities of S. Winconsin. Ecological Monographs 27: 325-349.
    
    @param original: list of original data
    @param test: list of data to test against original"""
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Bray-Curtis distance")
    return Manhattan(original, test) / (summation(original) + summation(test))
示例#5
0
def SpearmanCorrelation(**kwargs):
    """
    Test 58: Spearman rank correlation test (paired observations)
    To investigate the significance of the correlation between two series of 
    observations obtained in pairs.
    
    Limitations:
    1. Assumes the two population distributions to be continuous
    2. Sample size must be more than 10
    
    @param R: sum of squared ranks differences
    @param ssize: sample size
    @param series1: ranks of series #1 (not used if R is given)
    @param series2: ranks of series #2 (not used if R is given)
    @param confidence: confidence level
    
    @see: Ling, MHT. 2009. Ten Z-Test Routines from Gopal Kanji's 100 
    Statistical Tests. The Python Papers Source Codes 1:5
    """
    ssize = kwargs['ssize']
    if not kwargs.has_key('R'):
        series1 = kwargs['series1']
        series2 = kwargs['series2']
        R = [((series1[i] - series2[i])**2) for i in range(len(series1))]
        R = summation(R)
    else:
        R = kwargs['R']
    statistic = (6.0 * R) - (ssize * ((ssize**2) - 1.0))
    statistic = statistic / (ssize * (ssize + 1.0) * sqrt(ssize - 1.0))
    return test(statistic, NormalDistribution(), kwargs['confidence'])
示例#6
0
def SpearmanCorrelation(**kwargs):
    """
    Test 58: Spearman rank correlation test (paired observations)
    To investigate the significance of the correlation between two series of 
    observations obtained in pairs.
    
    Limitations:
    1. Assumes the two population distributions to be continuous
    2. Sample size must be more than 10
    
    @param R: sum of squared ranks differences
    @param ssize: sample size
    @param series1: ranks of series #1 (not used if R is given)
    @param series2: ranks of series #2 (not used if R is given)
    @param confidence: confidence level
    
    @see: Ling, MHT. 2009. Ten Z-Test Routines from Gopal Kanji's 100 
    Statistical Tests. The Python Papers Source Codes 1:5
    """
    ssize = kwargs['ssize']
    if not kwargs.has_key('R'):
        series1 = kwargs['series1']
        series2 = kwargs['series2']
        R = [((series1[i] - series2[i]) ** 2) for i in range(len(series1))]
        R = summation(R)
    else:
        R = kwargs['R']
    statistic = (6.0 * R) - (ssize * ((ssize ** 2) - 1.0))
    statistic = statistic / (ssize * (ssize + 1.0) * sqrt(ssize - 1.0))
    return test(statistic, NormalDistribution(), kwargs['confidence'])