def _SP(xdata, mx, ydata, my): """SP = sum of product of deviations. Helper function for calculating covariance directly. """ if mx is None: # Two pass algorithm. xdata = as_sequence(xdata) mx = stats.mean(xdata) if my is None: # Two pass algorithm. ydata = as_sequence(ydata) my = stats.mean(ydata) return _generalised_sum(zip(xdata, ydata), lambda t: (t[0]-mx)*(t[1]-my))
def split(xdata, ydata=None): """Helper function which splits xydata into (xdata, ydata).""" # The two-argument case is easy -- just pass them unchanged. if ydata is not None: xdata = as_sequence(xdata) ydata = as_sequence(ydata) if len(xdata) < len(ydata): ydata = ydata[:len(xdata)] elif len(xdata) > len(ydata): xdata = xdata[:len(ydata)] assert len(xdata) == len(ydata) return (xdata, ydata) # The single argument case could be either [x0, x1, x2, ...] or # [(x0, y0), (x1, y1), (x2, y2), ...]. We decide which it is by # looking at the first item, and treating it as canonical. it = iter(xdata) try: first = next(it) except StopIteration: # If the iterable is empty, return two empty lists. return ([], []) # If we get here, we know we have a single iterable argument with at # least one item. Does it look like a sequence of (x,y) values, or # like a sequence of x values? try: n = len(first) except TypeError: # Looks like we're dealing with the case [x0, x1, x2, ...] # This isn't exactly *multivariate*, but we support it anyway. # We leave it up to the caller to decide what to do with the # fake y values. xdata = [first] xdata.extend(it) return (xdata, [None]*len(xdata)) # Looks like [(x0, y0), (x1, y1), (x2, y2), ...] # Here we expect that each point has two items, and fail if not. if n != 2: raise TypeError('expecting 2-tuple (x, y) but got %d-tuple' % n) xlist = [first[0]] ylist = [first[1]] for x,y in it: xlist.append(x) ylist.append(y) assert len(xlist) == len(ylist) return (xlist, ylist)