def MA_zscore(G, R, window=1./5., padded=False, progressCallback=None): """ Return the Z-score of log2 fold ratio estimated from local distribution of log2 fold ratio values on the MA-plot """ ratio, intensity = ratio_intensity(G, R) z_scores = numpy.ma.zeros(G.shape) sorted = list(numpy.ma.argsort(intensity)) import math, random r = int(math.ceil(len(sorted)*window)) # number of window elements def local_indices(i, sorted): """ local indices in sorted (mirror padded if out of bounds) """ start, end = i - r/2, i + r/2 + r%2 pad_start , pad_end = [], [] if start < 0: pad_start = sorted[:abs(start)] random.shuffle(pad_start) start = 0 if end > len(sorted): pad_end = sorted[end - len(sorted):] random.shuffle(pad_end) end = len(sorted) if padded: return pad_start + sorted[start: end] + pad_end else: return sorted[start:end] milestones = orngMisc.progressBarMilestones(len(sorted)) for i in range(len(sorted)): indices = local_indices(i, sorted) localRatio = numpy.take(ratio, indices) local_std = numpy.ma.std(localRatio) ind = sorted[i] z_scores[ind] = ratio[ind] / local_std if progressCallback and i in milestones: progressCallback(100. * i / len(sorted)) z_scores._mask = - numpy.isfinite(z_scores) return z_scores
def lowess2(x, y, xest, f=2./3., iter=3, progressCallback=None): """Returns estimated values of y in data points xest (or None if estimation fails). Lowess smoother: Robust locally weighted regression. The lowess function fits a nonparametric regression curve to a scatterplot. The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatterplot. The function returns the estimated (smooth) values of y. The smoothing span is given by f. A larger value for f will result in a smoother curve. The number of robustifying iterations is given by iter. The function will run faster with a smaller number of iterations. Taken from Peter Juvan's numpyExtn.py, modified for numpy, computes pairwise distances inplace """ x = numpy.asarray(x, 'f') y = numpy.asarray(y, 'f') xest = numpy.asarray(xest, 'f') n = len(x) nest = len(xest) r = min(int(numpy.ceil(f*n)),n-1) # radius: num. of points to take into LR # h = [numpy.sort(numpy.abs(x-x[i]))[r] for i in range(n)] # distance of the r-th point from x[i] dist = [x] - numpy.transpose([x]) dist = numpy.abs(dist, dist) dist.sort(axis=1) h = dist[:, r] del dist # to free memory w = [x] - numpy.transpose([x]) w /= h w = numpy.abs(w, w) w = numpy.clip(w, 0.0, 1.0, w) # w = numpy.clip(numpy.abs(([x]-numpy.transpose([x]))/h),0.0,1.0) w **= 3 w *= -1 w += 1 # w = 1 - w**3 #1-w*w*w w **= 3 # w = w**3 #w*w*w # hest = [numpy.sort(numpy.abs(x-xest[i]))[r] for i in range(nest)] # r-th min. distance from xest[i] to x dist = [x] - numpy.transpose([xest]) dist = numpy.abs(dist, dist) dist.sort(axis=1) hest = dist[:, r] del dist # to free memory # west = numpy.clip(numpy.abs(([xest]-numpy.transpose([x]))/hest),0.0,1.0) # shape: (len(x), len(xest) west = [xest]-numpy.transpose([x]) west /= hest west = numpy.abs(west, west) west = numpy.clip(west, 0.0, 1.0, west) # west = 1 - west**3 #1-west*west*west west **= 3 west *= -1 west += 1 # west = west**3 #west*west*west west **= 3 yest = numpy.zeros(n,'f') yest2 = numpy.zeros(nest,'f') delta = numpy.ones(n,'f') iter_count = iter*(nest + n) if iter > 1 else nest milestones = orngMisc.progressBarMilestones(iter_count) curr_iter = 0 for iteration in range(iter): # fit xest for i in range(nest): weights = delta * west[:,i] b = numpy.array([numpy.sum(weights*y), numpy.sum(weights*y*x)]) A = numpy.array([[numpy.sum(weights), numpy.sum(weights*x)], [numpy.sum(weights*x), numpy.sum(weights*x*x)]]) beta = numpy.linalg.solve(A, b) yest2[i] = beta[0] + beta[1]*xest[i] if progressCallback and curr_iter in milestones: progressCallback(100. * curr_iter / iter_count) curr_iter += 1 # fit x (to calculate residuals and delta) if iter > 1: for i in range(n): weights = delta * w[:,i] b = numpy.array([numpy.sum(weights*y), numpy.sum(weights*y*x)]) A = numpy.array([[numpy.sum(weights), numpy.sum(weights*x)], [numpy.sum(weights*x), numpy.sum(weights*x*x)]]) beta = numpy.linalg.solve(A,b) yest[i] = beta[0] + beta[1]*x[i] if progressCallback and curr_iter in milestones: progressCallback(100. * curr_iter / iter_count) curr_iter += 1 residuals = y-yest s = numpy.median(numpy.abs(residuals)) delta = numpy.clip(residuals/(6*s), -1, 1) delta = 1-delta*delta delta = delta*delta return yest2
def lowess(x, y, f=2./3., iter=3, progressCallback=None): """ Lowess taken from Bio.Statistics.lowess, modified to compute pairwise distances inplace. lowess(x, y, f=2./3., iter=3) -> yest Lowess smoother: Robust locally weighted regression. The lowess function fits a nonparametric regression curve to a scatterplot. The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatterplot. The function returns the estimated (smooth) values of y. The smoothing span is given by f. A larger value for f will result in a smoother curve. The number of robustifying iterations is given by iter. The function will run faster with a smaller number of iterations. x and y should be numpy float arrays of equal length. The return value is also a numpy float array of that length. e.g. >>> import numpy >>> x = numpy.array([4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, ... 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16, ... 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20, ... 20, 22, 23, 24, 24, 24, 24, 25], numpy.float) >>> y = numpy.array([2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, ... 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40, ... 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40, ... 32, 40, 50, 42, 56, 76, 84, 36, 46, 68, 32, 48, 52, 56, ... 64, 66, 54, 70, 92, 93, 120, 85], numpy.float) >>> result = lowess(x, y) >>> len(result) 50 >>> print "[%0.2f, ..., %0.2f]" % (result[0], result[-1]) [4.85, ..., 84.98] """ n = len(x) r = min(int(numpy.ceil(f*n)), n - 1) # h = [numpy.sort(numpy.abs(x-x[i]))[r] for i in range(n)] # h, xtmp = numpy.zeros_like(x), numpy.zeros_like(x) # for i in range(n): # xtmp = numpy.abs(x - x[i], xtmp) # h[i] = numpy.sort(xtmp)[r] # w = numpy.clip(numpy.abs(([x]-numpy.transpose([x]))/h),0.0,1.0) dist = [x] - numpy.transpose([x]) dist = numpy.abs(dist, dist) dist.sort(axis=1) h = dist[:, r] del dist w = [x]-numpy.transpose([x]) w /= h w = numpy.abs(w, w) w = numpy.clip(w, 0.0, 1.0, w) # w = 1-w*w*w w **= 3 w *= -1 w += 1 # w = w*w*w w **= 3 yest = numpy.zeros(n) delta = numpy.ones(n) milestones = orngMisc.progressBarMilestones(iter*n) for iteration in range(iter): for i in xrange(n): weights = delta * w[:,i] weights_mul_x = weights * x b1 = numpy.ma.dot(weights,y) b2 = numpy.ma.dot(weights_mul_x,y) A11 = sum(weights) A12 = sum(weights_mul_x) A21 = A12 A22 = numpy.ma.dot(weights_mul_x,x) determinant = A11*A22 - A12*A21 beta1 = (A22*b1-A12*b2) / determinant beta2 = (A11*b2-A21*b1) / determinant yest[i] = beta1 + beta2*x[i] if progressCallback and (iteration*n + i) in milestones: progressCallback((100. * iteration*n + i) / (iter * n)) residuals = y-yest s = median(abs(residuals)) delta[:] = numpy.clip(residuals/(6*s),-1,1) delta[:] = 1-delta*delta delta[:] = delta*delta return yest