def test_pca_fit_transform_fp32(nrows, ncols, n_parts, client=None): owns_cluster = False if client is None: owns_cluster = True cluster = LocalCUDACluster(threads_per_worker=1) client = Client(cluster) from cuml.dask.decomposition import PCA as daskPCA from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts, cluster_std=1.5, verbose=False, random_state=10, dtype=np.float32) wait(X_cudf) cupca = daskPCA(n_components=20, whiten=True) cupca.fit_transform(X_cudf) if owns_cluster: client.close() cluster.close()
def test_pca_fit(nrows, ncols, n_parts, input_type, cluster): client = Client(cluster) try: from cuml.dask.decomposition import PCA as daskPCA from sklearn.decomposition import PCA from cuml.dask.datasets import make_blobs X, _ = make_blobs(n_samples=nrows, n_features=ncols, centers=1, n_parts=n_parts, cluster_std=0.5, random_state=10, dtype=np.float32) wait(X) if input_type == "dataframe": X_train = to_dask_cudf(X) X_cpu = X_train.compute().to_pandas().values elif input_type == "array": X_train = X X_cpu = cp.asnumpy(X_train.compute()) try: cupca = daskPCA(n_components=5, whiten=True) cupca.fit(X_train) except Exception as e: print(str(e)) skpca = PCA(n_components=5, whiten=True, svd_solver="full") skpca.fit(X_cpu) from cuml.test.utils import array_equal all_attr = [ 'singular_values_', 'components_', 'explained_variance_', 'explained_variance_ratio_' ] for attr in all_attr: with_sign = False if attr in ['components_'] else True cuml_res = (getattr(cupca, attr)) if type(cuml_res) == np.ndarray: cuml_res = cuml_res.as_matrix() skl_res = getattr(skpca, attr) assert array_equal(cuml_res, skl_res, 1e-1, with_sign=with_sign) finally: client.close()
def test_pca_fit(nrows, ncols, n_parts, client=None): owns_cluster = False if client is None: owns_cluster = True cluster = LocalCUDACluster(threads_per_worker=1) client = Client(cluster) from cuml.dask.decomposition import PCA as daskPCA from sklearn.decomposition import PCA from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts, cluster_std=0.5, verbose=False, random_state=10, dtype=np.float32) wait(X_cudf) X = X_cudf.compute().to_pandas().values cupca = daskPCA(n_components=5, whiten=True) cupca.fit(X_cudf) skpca = PCA(n_components=5, whiten=True, svd_solver="full") skpca.fit(X) from cuml.test.utils import array_equal all_attr = [ 'singular_values_', 'components_', 'explained_variance_', 'explained_variance_ratio_' ] if owns_cluster: client.close() cluster.close() for attr in all_attr: with_sign = False if attr in ['components_'] else True cuml_res = (getattr(cupca, attr)) if type(cuml_res) == np.ndarray: cuml_res = cuml_res.as_matrix() skl_res = getattr(skpca, attr) assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign)
def test_pca_fit_transform_fp64(nrows, ncols, n_parts, client): from cuml.dask.decomposition import PCA as daskPCA from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(n_samples=nrows, n_features=ncols, centers=1, n_parts=n_parts, cluster_std=1.5, random_state=10, dtype=np.float64) cupca = daskPCA(n_components=30, whiten=False) res = cupca.fit_transform(X_cudf) res = res.compute() assert res.shape[0] == nrows and res.shape[1] == 30
def test_pca_fit_transform_fp32_noncomponents(nrows, ncols, n_parts, client): # Tests the case when n_components is not passed for MG scenarios from cuml.dask.decomposition import PCA as daskPCA from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(n_samples=nrows, n_features=ncols, centers=1, n_parts=n_parts, cluster_std=1.5, random_state=10, dtype=np.float32) cupca = daskPCA(whiten=False) res = cupca.fit_transform(X_cudf) res = res.compute() assert res.shape[0] == nrows and res.shape[1] == 20
def test_pca_fit(nrows, ncols, n_parts, cluster): client = Client(cluster) try: from cuml.dask.decomposition import PCA as daskPCA from sklearn.decomposition import PCA from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts, cluster_std=0.5, verbose=False, random_state=10, dtype=np.float32) wait(X_cudf) print(str(X_cudf.head(3))) try: cupca = daskPCA(n_components=5, whiten=True) cupca.fit(X_cudf) except Exception as e: print(str(e)) X = X_cudf.compute().to_pandas().values skpca = PCA(n_components=5, whiten=True, svd_solver="full") skpca.fit(X) from cuml.test.utils import array_equal all_attr = ['singular_values_', 'components_', 'explained_variance_', 'explained_variance_ratio_'] for attr in all_attr: with_sign = False if attr in ['components_'] else True cuml_res = (getattr(cupca, attr)) if type(cuml_res) == np.ndarray: cuml_res = cuml_res.as_matrix() skl_res = getattr(skpca, attr) assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign) finally: client.close()
def test_pca_fit_transform_fp32(nrows, ncols, n_parts, cluster): client = Client(cluster) try: from cuml.dask.decomposition import PCA as daskPCA from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts, cluster_std=1.5, verbose=False, random_state=10, dtype=np.float32) wait(X_cudf) cupca = daskPCA(n_components=20, whiten=True) cupca.fit_transform(X_cudf) finally: client.close()
def test_pca_fit_transform_fp64(nrows, ncols, n_parts, cluster): client = Client(cluster) try: from cuml.dask.decomposition import PCA as daskPCA from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(n_samples=nrows, n_features=ncols, centers=1, n_parts=n_parts, cluster_std=1.5, random_state=10, dtype=np.float64) wait(X_cudf) cupca = daskPCA(n_components=30, whiten=False) cupca.fit_transform(X_cudf) finally: client.close()