def ttest_ind(a, b, axis=0): """Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores a, and b. From Numerical Recipies, p.483. Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b). Returns: t-value, two-tailed p-value """ a, b, axis = _chk2_asarray(a, b, axis) x1 = np.average( a, axis) x2 = np.average( b, axis) v1 = a.var(axis) v2 = b.var(axis) if np.ma.getmask(a).any(): n1 = a.shape[axis] - a.mask.sum(axis) else: n1 = a.shape[axis] if np.ma.getmask(b).any(): n2 = b.shape[axis] - b.mask.sum(axis) else: n2 = b.shape[axis] df = n1+n2-2 svar = ((n1-1.0)*v1+(n2-1.0)*v2) / df zerodivproblem = svar == 0 t = (x1-x2)/np.sqrt(svar*(1.0/n1 + 1.0/n2)) # N-D COMPUTATION HERE!!!!!! t = np.where(zerodivproblem, 1.0, t) # replace NaN t-values with 1.0 probs = stats.betai(0.5*df, 0.5, df /(df+t*t)) if not np.isscalar(t): probs = probs.reshape(t.shape) if not np.isscalar(probs) and len(probs) == 1: probs = probs[0] return t, probs
def ttest_ind(a, b, axis=0): """Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores a, and b. From Numerical Recipies, p.483. Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b). Returns: t-value, two-tailed p-value """ a, b, axis = _chk2_asarray(a, b, axis) x1 = np.average(a, axis) x2 = np.average(b, axis) v1 = a.var(axis) v2 = b.var(axis) if np.ma.getmask(a).any(): n1 = a.shape[axis] - a.mask.sum(axis) else: n1 = a.shape[axis] if np.ma.getmask(b).any(): n2 = b.shape[axis] - b.mask.sum(axis) else: n2 = b.shape[axis] df = n1 + n2 - 2 svar = ((n1 - 1.0) * v1 + (n2 - 1.0) * v2) / df zerodivproblem = svar == 0 t = (x1 - x2) / np.sqrt( svar * (1.0 / n1 + 1.0 / n2)) # N-D COMPUTATION HERE!!!!!! t = np.where(zerodivproblem, 1.0, t) # replace NaN t-values with 1.0 probs = stats.betai(0.5 * df, 0.5, df / (df + t * t)) if not np.isscalar(t): probs = probs.reshape(t.shape) if not np.isscalar(probs) and len(probs) == 1: probs = probs[0] return t, probs
def approxrand(a, b, **kwargs): """ Returns an approximate significance level between two lists of independently generated test values. Approximate randomization calculates significance by randomly drawing from a sample of the possible permutations. At the limit of the number of possible permutations, the significance level is exact. The approximate significance level is the sample mean number of times the statistic of the permutated lists varies from the actual statistic of the unpermuted argument lists. @return: a tuple containing an approximate significance level, the count of the number of times the pseudo-statistic varied from the actual statistic, and the number of shuffles @rtype: C{tuple} @param a: a list of test values @type a: C{list} @param b: another list of independently generated test values @type b: C{list} """ shuffles = kwargs.get('shuffles', 999) # there's no point in trying to shuffle beyond all possible permutations shuffles = \ min(shuffles, reduce(lambda x, y: x * y, xrange(1, len(a) + len(b) + 1))) stat = kwargs.get('statistic', lambda lst: float(sum(lst)) / len(lst)) verbose = kwargs.get('verbose', False) if verbose: print 'shuffles: %d' % shuffles actual_stat = math.fabs(stat(a) - stat(b)) if verbose: print 'actual statistic: %f' % actual_stat print '-' * 60 c = 1e-100 lst = LazyConcatenation([a, b]) indices = range(len(a) + len(b)) for i in range(shuffles): if verbose and i % 10 == 0: print 'shuffle: %d' % i random.shuffle(indices) pseudo_stat_a = stat(LazyMap(lambda i: lst[i], indices[:len(a)])) pseudo_stat_b = stat(LazyMap(lambda i: lst[i], indices[len(a):])) pseudo_stat = math.fabs(pseudo_stat_a - pseudo_stat_b) if pseudo_stat >= actual_stat: c += 1 if verbose and i % 10 == 0: print 'pseudo-statistic: %f' % pseudo_stat print 'significance: %f' % (float(c + 1) / (i + 1)) print '-' * 60 significance = float(c + 1) / (shuffles + 1) if verbose: print 'significance: %f' % significance if betai: for phi in [0.01, 0.05, 0.10, 0.15, 0.25, 0.50]: print "prob(phi<=%f): %f" % (phi, betai(c, shuffles, phi)) return (significance, c, shuffles)
def approxrand(a, b, **kwargs): """ Returns an approximate significance level between two lists of independently generated test values. Approximate randomization calculates significance by randomly drawing from a sample of the possible permutations. At the limit of the number of possible permutations, the significance level is exact. The approximate significance level is the sample mean number of times the statistic of the permutated lists varies from the actual statistic of the unpermuted argument lists. :return: a tuple containing an approximate significance level, the count of the number of times the pseudo-statistic varied from the actual statistic, and the number of shuffles :rtype: tuple :param a: a list of test values :type a: list :param b: another list of independently generated test values :type b: list """ shuffles = kwargs.get('shuffles', 999) # there's no point in trying to shuffle beyond all possible permutations shuffles = \ min(shuffles, reduce(lambda x, y: x * y, xrange(1, len(a) + len(b) + 1))) stat = kwargs.get('statistic', lambda lst: float(sum(lst)) / len(lst)) verbose = kwargs.get('verbose', False) if verbose: print 'shuffles: %d' % shuffles actual_stat = fabs(stat(a) - stat(b)) if verbose: print 'actual statistic: %f' % actual_stat print '-' * 60 c = 1e-100 lst = LazyConcatenation([a, b]) indices = range(len(a) + len(b)) for i in range(shuffles): if verbose and i % 10 == 0: print 'shuffle: %d' % i shuffle(indices) pseudo_stat_a = stat(LazyMap(lambda i: lst[i], indices[:len(a)])) pseudo_stat_b = stat(LazyMap(lambda i: lst[i], indices[len(a):])) pseudo_stat = fabs(pseudo_stat_a - pseudo_stat_b) if pseudo_stat >= actual_stat: c += 1 if verbose and i % 10 == 0: print 'pseudo-statistic: %f' % pseudo_stat print 'significance: %f' % (float(c + 1) / (i + 1)) print '-' * 60 significance = float(c + 1) / (shuffles + 1) if verbose: print 'significance: %f' % significance if betai: for phi in [0.01, 0.05, 0.10, 0.15, 0.25, 0.50]: print "prob(phi<=%f): %f" % (phi, betai(c, shuffles, phi)) return (significance, c, shuffles)
def pearsonr(x, y, dof=None): """ Calculates a Pearson correlation coefficient and the p-value for testing non-correlation. The Pearson correlation coefficient measures the linear relationship between two datasets. Strictly speaking, Pearson's correlation requires that each dataset be normally distributed. Like other correlation coefficients, this one varies between -1 and +1 with 0 implying no correlation. Correlations of -1 or +1 imply an exact linear relationship. Positive correlations imply that as x increases, so does y. Negative correlations imply that as x increases, y decreases. The p-value roughly indicates the probability of an uncorrelated system producing datasets that have a Pearson correlation at least as extreme as the one computed from these datasets. The p-values are not entirely reliable but are probably reasonable for datasets larger than 500 or so. This is a modified version that supports an optional argument to set the degrees of freedom (dof) manually. Parameters ---------- x : (N,) array_like Input y : (N,) array_like Input dof : int or None, optional Input Returns ------- (Pearson's correlation coefficient, 2-tailed p-value) References ---------- http://www.statsoft.com/textbook/glosp.html#Pearson%20Correlation """ # x and y should have same length. x = np.asarray(x) y = np.asarray(y) n = len(x) mx = x.mean() my = y.mean() xm, ym = x-mx, y-my r_num = np.add.reduce(xm * ym) r_den = np.sqrt(ss(xm) * ss(ym)) r = r_num / r_den # Presumably, if abs(r) > 1, then it is only some small artifact of floating # point arithmetic. r = max(min(r, 1.0), -1.0) df = n-2 if dof is None else dof if abs(r) == 1.0: prob = 0.0 else: t_squared = r*r * (df / ((1.0 - r) * (1.0 + r))) prob = betai(0.5*df, 0.5, df / (df + t_squared)) return r, prob
def approxrand(a, b, **kwargs): """ Returns an approximate significance level between two lists of independently generated test values. Approximate randomization calculates significance by randomly drawing from a sample of the possible permutations. At the limit of the number of possible permutations, the significance level is exact. The approximate significance level is the sample mean number of times the statistic of the permutated lists varies from the actual statistic of the unpermuted argument lists. :return: a tuple containing an approximate significance level, the count of the number of times the pseudo-statistic varied from the actual statistic, and the number of shuffles :rtype: tuple :param a: a list of test values :type a: list :param b: another list of independently generated test values :type b: list """ shuffles = kwargs.get("shuffles", 999) # there's no point in trying to shuffle beyond all possible permutations shuffles = min(shuffles, reduce(operator.mul, range(1, len(a) + len(b) + 1))) stat = kwargs.get("statistic", lambda lst: sum(lst) / len(lst)) verbose = kwargs.get("verbose", False) if verbose: print("shuffles: %d" % shuffles) actual_stat = fabs(stat(a) - stat(b)) if verbose: print("actual statistic: %f" % actual_stat) print("-" * 60) c = 1e-100 lst = LazyConcatenation([a, b]) indices = list(range(len(a) + len(b))) for i in range(shuffles): if verbose and i % 10 == 0: print("shuffle: %d" % i) shuffle(indices) pseudo_stat_a = stat(LazyMap(lambda i: lst[i], indices[: len(a)])) pseudo_stat_b = stat(LazyMap(lambda i: lst[i], indices[len(a) :])) pseudo_stat = fabs(pseudo_stat_a - pseudo_stat_b) if pseudo_stat >= actual_stat: c += 1 if verbose and i % 10 == 0: print("pseudo-statistic: %f" % pseudo_stat) print("significance: %f" % ((c + 1) / (i + 1))) print("-" * 60) significance = (c + 1) / (shuffles + 1) if verbose: print("significance: %f" % significance) if betai: for phi in [0.01, 0.05, 0.10, 0.15, 0.25, 0.50]: print("prob(phi<=%f): %f" % (phi, betai(c, shuffles, phi))) return (significance, c, shuffles)