def pearson(self): """ Calculates the Pearson's product-moment coefficient by the formula: (N * sum_xy) - (sum_x * sum_y) -------------------------------------------------------------- ((N * sum_x2 - (sum_x)**2) * (N * sum_y2 - (sum_y)**2)) ** 0.5 """ sname = self.listSamples() if self.sample[sname[0]].rowcount == self.sample[sname[1]].rowcount: slen = self.sample[sname[0]].rowcount elif self.sample[sname[0]].rowcount > self.sample[sname[1]].rowcount: slen = self.sample[sname[1]].rowcount else: slen = self.sample[sname[0]].rowcount sum_x = summation([self.sample[sname[0]].data[i] for i in range(slen)]) sum_x2 = summation([self.sample[sname[0]].data[i] * \ self.sample[sname[0]].data[i] for i in range(slen)]) sum_y = summation([self.sample[sname[1]].data[i] for i in range(slen)]) sum_y2 = summation([self.sample[sname[1]].data[i] * \ self.sample[sname[1]].data[i] for i in range(slen)]) sum_xy = summation([self.sample[sname[0]].data[i] * \ self.sample[sname[1]].data[i] for i in range(slen)]) numerator = (slen * sum_xy) - (sum_x * sum_y) denominator_x = (slen * sum_x2) - (sum_x * sum_x) denominator_y = (slen * sum_y2) - (sum_y * sum_y) return float(numerator / ((denominator_x * denominator_y) ** 0.5))
def pearson(self): """ Calculates the Pearson's product-moment coefficient by the formula: (N * sum_xy) - (sum_x * sum_y) -------------------------------------------------------------- ((N * sum_x2 - (sum_x)**2) * (N * sum_y2 - (sum_y)**2)) ** 0.5 """ sname = self.listSamples() if self.sample[sname[0]].rowcount == self.sample[sname[1]].rowcount: slen = self.sample[sname[0]].rowcount elif self.sample[sname[0]].rowcount > self.sample[sname[1]].rowcount: slen = self.sample[sname[1]].rowcount else: slen = self.sample[sname[0]].rowcount sum_x = summation([self.sample[sname[0]].data[i] for i in range(slen)]) sum_x2 = summation([self.sample[sname[0]].data[i] * \ self.sample[sname[0]].data[i] for i in range(slen)]) sum_y = summation([self.sample[sname[1]].data[i] for i in range(slen)]) sum_y2 = summation([self.sample[sname[1]].data[i] * \ self.sample[sname[1]].data[i] for i in range(slen)]) sum_xy = summation([self.sample[sname[0]].data[i] * \ self.sample[sname[1]].data[i] for i in range(slen)]) numerator = (slen * sum_xy) - (sum_x * sum_y) denominator_x = (slen * sum_x2) - (sum_x * sum_x) denominator_y = (slen * sum_y2) - (sum_y * sum_y) return float(numerator / ((denominator_x * denominator_y)**0.5))
def Bray_Curtis(original, test): """ Bray-Curtis Distance is distance measure for interval or ratio data. @see: Bray JR and Curtis JT. 1957. An ordination of the upland forest communities of S. Winconsin. Ecological Monographs 27: 325-349. @param original: list of original data @param test: list of data to test against original""" if len(original) != len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Bray-Curtis distance") return Manhattan(original, test) / (summation(original) + summation(test))
def SpearmanCorrelation(**kwargs): """ Test 58: Spearman rank correlation test (paired observations) To investigate the significance of the correlation between two series of observations obtained in pairs. Limitations: 1. Assumes the two population distributions to be continuous 2. Sample size must be more than 10 @param R: sum of squared ranks differences @param ssize: sample size @param series1: ranks of series #1 (not used if R is given) @param series2: ranks of series #2 (not used if R is given) @param confidence: confidence level @see: Ling, MHT. 2009. Ten Z-Test Routines from Gopal Kanji's 100 Statistical Tests. The Python Papers Source Codes 1:5 """ ssize = kwargs['ssize'] if not kwargs.has_key('R'): series1 = kwargs['series1'] series2 = kwargs['series2'] R = [((series1[i] - series2[i])**2) for i in range(len(series1))] R = summation(R) else: R = kwargs['R'] statistic = (6.0 * R) - (ssize * ((ssize**2) - 1.0)) statistic = statistic / (ssize * (ssize + 1.0) * sqrt(ssize - 1.0)) return test(statistic, NormalDistribution(), kwargs['confidence'])
def SpearmanCorrelation(**kwargs): """ Test 58: Spearman rank correlation test (paired observations) To investigate the significance of the correlation between two series of observations obtained in pairs. Limitations: 1. Assumes the two population distributions to be continuous 2. Sample size must be more than 10 @param R: sum of squared ranks differences @param ssize: sample size @param series1: ranks of series #1 (not used if R is given) @param series2: ranks of series #2 (not used if R is given) @param confidence: confidence level @see: Ling, MHT. 2009. Ten Z-Test Routines from Gopal Kanji's 100 Statistical Tests. The Python Papers Source Codes 1:5 """ ssize = kwargs['ssize'] if not kwargs.has_key('R'): series1 = kwargs['series1'] series2 = kwargs['series2'] R = [((series1[i] - series2[i]) ** 2) for i in range(len(series1))] R = summation(R) else: R = kwargs['R'] statistic = (6.0 * R) - (ssize * ((ssize ** 2) - 1.0)) statistic = statistic / (ssize * (ssize + 1.0) * sqrt(ssize - 1.0)) return test(statistic, NormalDistribution(), kwargs['confidence'])