def test_shaping_3_values(eng): svd = lambda x: SVD(k=3, method='direct', seed=0).fit(x) # baseline: ndarray (local) or BoltArray (spark) x = make_low_rank_matrix(n_samples=10, n_features=10, random_state=0) x = series.fromarray(x, engine=eng).values u, s, v = svd(x) # simple series x1 = series.fromarray(x) u1, s1, v1 = svd(x1) assert allclose(u, u1) assert allclose(s, s1) assert allclose(v, v1) # series with multiple dimensions x1 = series.fromarray(x.reshape(2, 5, 10)) u1, s1, v1 = svd(x1) u1 = u1.reshape(10, 3) assert allclose(u, u1) assert allclose(s, s1) assert allclose(v, v1) # images (must have multiple dimensions) x1 = images.fromarray(x.reshape(10, 2, 5)) u1, s1, v1 = svd(x1) v1 = v1.reshape(3, 10) assert allclose(u, u1) assert allclose(s, s1) assert allclose(v, v1)
def test_shaping_3_values(eng): svd= lambda x: SVD(k=3, method='direct', seed=0).fit(x) # baseline: ndarray (local) or BoltArray (spark) x = make_low_rank_matrix(n_samples=10, n_features=10, random_state=0) x = series.fromarray(x, engine=eng).values u, s, v = svd(x) # simple series x1 = series.fromarray(x) u1, s1, v1 = svd(x1) assert allclose(u, u1) assert allclose(s, s1) assert allclose(v, v1) # series with multiple dimensions x1 = series.fromarray(x.reshape(2, 5, 10)) u1, s1, v1 = svd(x1) u1 = u1.reshape(10, 3) assert allclose(u, u1) assert allclose(s, s1) assert allclose(v, v1) # images (must have multiple dimensions) x1 = images.fromarray(x.reshape(10, 2, 5)) u1, s1, v1 = svd(x1) v1 = v1.reshape(3, 10) assert allclose(u, u1) assert allclose(s, s1) assert allclose(v, v1)
def test_shaping_2_values(eng): pca = lambda x: PCA(k=3, svd_method='direct', seed=0).fit(x) # baseline: ndarray (local) or BoltArray (spark) x = make_low_rank_matrix(n_samples=10, n_features=10, random_state=0) x = series.fromarray(x, engine=eng).values t, w = pca(x) # simple series x1 = series.fromarray(x) t1, w1 = pca(x1) assert allclose(t, t1) assert allclose(w, w1) # series with multiple dimensions x1 = series.fromarray(x.reshape(2, 5, 10)) t1, w1 = pca(x1) t1 = t1.reshape(10, 3) assert allclose(t, t1) assert allclose(w, w1) # images (must have multiple dimensions) x1 = images.fromarray(x.reshape(10, 2, 5)) t1, w1 = pca(x1) w1 = w1.reshape(3, 10) assert allclose(t, t1) assert allclose(w, w1)
def test_ica(eng): t = linspace(0, 10, 100) s1 = sin(t) s2 = square(sin(2*t)) x = c_[s1, s2, s1+s2] random.seed(0) x += 0.001*random.randn(*x.shape) x = fromarray(x, engine=eng) def normalize_ICA(s, aT): a = aT.T c = a.sum(axis=0) return s*c, (a/c).T from sklearn.decomposition import FastICA ica = FastICA(n_components=2, fun='cube', random_state=0) s1 = ica.fit_transform(x.toarray()) aT1 = ica.mixing_.T s1, aT1 = normalize_ICA(s1, aT1) s2, aT2 = ICA(k=2, svd_method='direct', max_iter=200, seed=0).fit(x) s2, aT2 = normalize_ICA(s2, aT2) tol=1e-1 assert allclose_sign_permute(s1, s2, atol=tol) assert allclose_sign_permute(aT1, aT2, atol=tol)
def test_predict(eng): X = randn(10, 2) y = fromarray(randn(10, 4).T, engine=eng) truth = asarray(predict_models(LR, X, y)) predictions = LinearRegression().fit(X, y).predict(X).toarray() assert allclose(truth, predictions)
def test_score(eng): X = randn(10, 2) y = fromarray(randn(10, 4).T, engine=eng) truth = asarray(score_models(LR, X, y)) scores = LinearRegression().fit(X, y).score(X, y).toarray() assert allclose(truth, scores)
def test_ica(eng): t = linspace(0, 10, 100) s1 = sin(t) s2 = square(sin(2 * t)) x = c_[s1, s2, s1 + s2] random.seed(0) x += 0.001 * random.randn(*x.shape) x = fromarray(x, engine=eng) def normalize_ICA(s, aT): a = aT.T c = a.sum(axis=0) return s * c, (a / c).T from sklearn.decomposition import FastICA ica = FastICA(n_components=2, fun='cube', random_state=0) s1 = ica.fit_transform(x.toarray()) aT1 = ica.mixing_.T s1, aT1 = normalize_ICA(s1, aT1) s2, aT2 = ICA(k=2, svd_method='direct', max_iter=200, seed=0).fit(x) s2, aT2 = normalize_ICA(s2, aT2) tol = 1e-1 assert allclose_sign_permute(s1, s2, atol=tol) assert allclose_sign_permute(aT1, aT2, atol=tol)
def toseries(y): if type(y) is ndarray: y = fromarray(y) elif type(y) is BoltArraySpark: y = fromrdd(y.tordd()) return y
def test_betas_and_scores(eng): X = randn(10, 2) y = fromarray(randn(10, 4).T, engine=eng) true_betas = asarray(fit_models(LR, X, y)) true_scores = asarray(score_models(LR, X, y)) truth = hstack([true_betas, true_scores[:, newaxis]]) result = LinearRegression().fit(X, y).betas_and_scores.toarray() assert allclose(truth, result)
def test_fast_linear(eng): X = randn(10, 2) y = fromarray(randn(10, 4).T, engine=eng) truth = asarray(fit_models(LR, X, y)) betas = FastLinearRegression().fit(X, y).betas.toarray() assert allclose(truth, betas) truth = asarray(fit_models(LR, X, y, fit_intercept=False)) betas = FastLinearRegression(fit_intercept=False).fit(X, y).betas.toarray() assert allclose(truth, betas)
def test_custom(eng): X = randn(10, 2) y = fromarray(randn(10, 4).T, engine=eng) truth = asarray(fit_models(Ridge, X, y)) betas = CustomRegression(Ridge()).fit(X, y).betas.toarray() assert allclose(truth, betas) kwargs = {"fit_intercept": False, "normalize": True} truth = asarray(fit_models(Ridge, X, y, **kwargs)) betas = CustomRegression(Ridge(**kwargs)).fit(X, y).betas.toarray() assert allclose(truth, betas)
def test_predict_and_score(eng): X = randn(10, 2) y = fromarray(randn(10, 4).T, engine=eng) model = LinearRegression().fit(X, y) yhat = model.predict(X).toarray() rsq = model.score(X, y).toarray() truth = hstack([yhat, rsq[:, newaxis]]) result = model.predict_and_score(X, y).toarray() assert allclose(truth, result)
def test_pca(eng): x = make_low_rank_matrix(n_samples=10, n_features=5, random_state=0) x = fromarray(x, engine=eng) from sklearn.decomposition import PCA as skPCA pca = skPCA(n_components=2) t1 = pca.fit_transform(x.toarray()) w1_T = pca.components_ t2, w2_T = PCA(k=2, svd_method='direct').fit(x) assert allclose_sign(w1_T.T, w2_T.T) assert allclose_sign(t1, t2) t2, w2_T = PCA(k=2, svd_method='em', max_iter=100, seed=0).fit(x) tol = 1e-1 assert allclose_sign(w1_T.T, w2_T.T, atol=tol) assert allclose_sign(t1, t2, atol=tol)
def test_svd(eng): x = make_low_rank_matrix(n_samples=10, n_features=5, random_state=0) x = fromarray(x, engine=eng) from sklearn.utils.extmath import randomized_svd u1, s1, v1 = randomized_svd(x.toarray(), n_components=2, random_state=0) u2, s2, v2 = SVD(k=2, method='direct').fit(x) assert allclose_sign(u1, u2) assert allclose(s1, s2) assert allclose_sign(v1.T, v2.T) u2, s2, v2 = SVD(k=2, method='em', max_iter=100, seed=0).fit(x) tol = 1e-1 assert allclose_sign(u1, u2, atol=tol) assert allclose(s1, s2, atol=tol) assert allclose_sign(v1.T, v2.T, atol=tol)
def test_nmf(eng): t = linspace(0, 10, 100) s1 = 1 + absolute(sin(t)) s2 = 1 + square(cos(2*t)) h = c_[s1, s2].T w = array([[1, 0], [0, 1], [1, 1]]) x = dot(w, h) x = fromarray(x, engine=eng) from sklearn.decomposition import NMF as skNMF nmf = skNMF(n_components=2, random_state=0) w1 = nmf.fit_transform(x.toarray()) h1 = nmf.components_ xhat1 = dot(w1, h1) w2, h2 = NMF(k=2, seed=0).fit(x) xhat2 = dot(w2, h2) tol=1e-1 assert allclose(xhat1, xhat2, atol=tol)
def test_nmf(eng): t = linspace(0, 10, 100) s1 = 1 + absolute(sin(t)) s2 = 1 + square(cos(2 * t)) h = c_[s1, s2].T w = array([[1, 0], [0, 1], [1, 1]]) x = dot(w, h) x = fromarray(x, engine=eng) from sklearn.decomposition import NMF as skNMF nmf = skNMF(n_components=2, random_state=0) w1 = nmf.fit_transform(x.toarray()) h1 = nmf.components_ xhat1 = dot(w1, h1) w2, h2 = NMF(k=2, seed=0).fit(x) xhat2 = dot(w2, h2) tol = 1e-1 assert allclose(xhat1, xhat2, atol=tol)