def test_svd_direct(self): data_local = [ array([1.0, 2.0, 6.0]), array([1.0, 3.0, 0.0]), array([1.0, 4.0, 6.0]), array([5.0, 1.0, 4.0]) ] data = self.sc.parallelize(zip(range(1, 5), data_local)) svd = SVD(k=1, method="direct") svd.calc(data) u_true, s_true, v_true = LinAlg.svd(array(data_local)) u_test = transpose(array(svd.u.map(lambda (_, v): v).collect()))[0] v_test = svd.v[0] assert(allclose(svd.s[0], s_true[0])) assert(allclose(v_test, v_true[0, :]) | allclose(-v_test, v_true[0, :])) assert(allclose(u_test, u_true[:, 0]) | allclose(-u_test, u_true[:, 0]))
def test_svd_em(self): data_local = [ array([1.0, 2.0, 6.0]), array([1.0, 3.0, 0.0]), array([1.0, 4.0, 6.0]), array([5.0, 1.0, 4.0]) ] data = self.sc.parallelize(zip(range(1, 5), data_local)) svd = SVD(k=1, method="em") svd.calc(data) u_true, s_true, v_true = LinAlg.svd(array(data_local)) u_test = transpose(array(svd.u.map(lambda (_, v): v).collect()))[0] v_test = svd.v[0] tol = 10e-04 # allow small error for iterative method assert(allclose(svd.s[0], s_true[0], atol=tol)) assert(allclose(v_test, v_true[0, :], atol=tol) | allclose(-v_test, v_true[0, :], atol=tol)) assert(allclose(u_test, u_true[:, 0], atol=tol) | allclose(-u_test, u_true[:, 0], atol=tol))
def fit(self, data): """Estimate principal components Parameters ---------- data : RDD of (tuple, array) pairs, or RowMatrix """ if type(data) is not RowMatrix: data = RowMatrix(data) data.center(0) svd = SVD(k=self.k, method=self.svdmethod) svd.calc(data) self.scores = svd.u self.latent = svd.s self.comps = svd.v return self
def fit(self, data): """ Fit independent components using an iterative fixed-point algorithm Parameters ---------- data: RDD of (tuple, array) pairs, or RowMatrix Data to estimate independent components from Returns ---------- self : returns an instance of self. """ d = len(data.first()[1]) if self.k is None: self.k = d if self.c > self.k: raise Exception("number of independent comps " + str(self.c) + " must be less than the number of principal comps " + str(self.k)) if self.k > d: raise Exception("number of principal comps " + str(self.k) + " must be less than the data dimensionality " + str(d)) if type(data) is not RowMatrix: data = RowMatrix(data) # reduce dimensionality svd = SVD(k=self.k, method=self.svdmethod).calc(data) # whiten data whtmat = real(dot(inv(diag(svd.s/sqrt(data.nrows))), svd.v)) unwhtmat = real(dot(transpose(svd.v), diag(svd.s/sqrt(data.nrows)))) wht = data.times(whtmat.T) # do multiple independent component extraction if self.seed != 0: random.seed(self.seed) b = orth(random.randn(self.k, self.c)) b_old = zeros((self.k, self.c)) iter = 0 minabscos = 0 errvec = zeros(self.maxiter) while (iter < self.maxiter) & ((1 - minabscos) > self.tol): iter += 1 # update rule for pow3 non-linearity (TODO: add others) b = wht.rows().map(lambda x: outer(x, dot(x, b) ** 3)).sum() / wht.nrows - 3 * b # make orthogonal b = dot(b, real(sqrtm(inv(dot(transpose(b), b))))) # evaluate error minabscos = min(abs(diag(dot(transpose(b), b_old)))) # store results b_old = b errvec[iter-1] = (1 - minabscos) # get un-mixing matrix w = dot(b.T, whtmat) # get mixing matrix a = dot(unwhtmat, b) # get components sigs = data.times(w.T).rdd self.w = w self.a = a self.sigs = sigs return self
def runtest(self): svd = SVD(3, method="direct").calc(self.rdd)