def test_lasso_default(dtype, nrows, column_info, n_parts, cluster): client = Client(cluster) ncols, n_info = column_info try: X, y = make_regression(n_samples=nrows, n_features=ncols, n_informative=n_info, client=client, dtype=dtype) wait(X) lasso = Lasso(client=client) lasso.fit(X, y) y_hat = lasso.predict(X) assert r2_score(y.compute(), y_hat.compute()) >= 0.99 finally: client.close()
def test_elastic_net(dtype, alpha, algorithm, nrows, column_info, n_parts, client, delayed): ncols, n_info = column_info X, y = make_regression(n_samples=nrows, n_features=ncols, n_informative=n_info, n_parts=n_parts, client=client, dtype=dtype) elasticnet = ElasticNet(alpha=np.array([alpha]), fit_intercept=True, normalize=False, max_iter=1000, selection=algorithm, tol=1e-10, client=client) elasticnet.fit(X, y) y_hat = elasticnet.predict(X, delayed=delayed) # based on differences with scikit-learn 0.22 if alpha == 0.2: assert r2_score(y.compute(), y_hat.compute()) >= 0.96 else: assert r2_score(y.compute(), y_hat.compute()) >= 0.80
def test_lasso(dtype, alpha, algorithm, nrows, column_info, n_parts, delayed, cluster): client = Client(cluster) ncols, n_info = column_info try: X, y = make_regression(n_samples=nrows, n_features=ncols, n_informative=n_info, n_parts=n_parts, client=client, dtype=dtype) wait(X) lasso = Lasso(alpha=np.array([alpha]), fit_intercept=True, normalize=False, max_iter=1000, selection=algorithm, tol=1e-10, client=client) lasso.fit(X, y) y_hat = lasso.predict(X, delayed=delayed) assert r2_score(y.compute(), y_hat.compute()) >= 0.99 finally: client.close()
def test_elastic_net_default(dtype, nrows, column_info, n_parts, cluster): client = Client(cluster) ncols, n_info = column_info try: X, y = make_regression(n_samples=nrows, n_features=ncols, n_informative=n_info, n_parts=n_parts, client=client, dtype=dtype) wait(X) elasticnet = ElasticNet(client=client) elasticnet.fit(X, y) y_hat = elasticnet.predict(X) assert r2_score(y.compute(), y_hat.compute()) >= 0.96 finally: client.close()
def test_make_regression(n_samples, n_features, n_informative, n_targets, bias, effective_rank, tail_strength, noise, shuffle, coef, random_state, n_parts, cluster): c = Client(cluster) try: from cuml.dask.datasets import make_regression result = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_informative, n_targets=n_targets, bias=bias, effective_rank=effective_rank, noise=noise, shuffle=shuffle, coef=coef, random_state=random_state, n_parts=n_parts) if coef: out, values, coefs = result else: out, values = result assert out.shape == (n_samples, n_features), "out shape mismatch" if n_targets > 1: assert values.shape == (n_samples, n_targets), \ "values shape mismatch" else: assert values.shape == (n_samples, ), "values shape mismatch" assert len(out.chunks[0]) == n_parts assert len(out.chunks[1]) == 1 if coef: if n_targets > 1: assert coefs.shape == (n_features, n_targets), \ "coefs shape mismatch" assert len(coefs.chunks[1]) == 1 else: assert coefs.shape == (n_features, ), "coefs shape mismatch" assert len(coefs.chunks[0]) == 1 test1 = da.all(da.sum(coefs != 0.0, axis=0) == n_informative) std_test2 = da.std(values - (da.dot(out, coefs) + bias), axis=0) test1, std_test2 = da.compute(test1, std_test2) diff = cp.abs(1.0 - std_test2) test2 = cp.all(diff < 1.5 * 10**(-1.)) assert test1, \ "Unexpected number of informative features" assert test2, "Unexpectedly incongruent outputs" finally: c.close()
def make_dataset(datatype, nrows, ncols, n_info): X, y = make_regression(n_samples=nrows, n_features=ncols, n_informative=n_info, random_state=0) X = X.astype(datatype) y = y.astype(datatype) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8) return X_train, y_train, X_test
def test_make_regression(n_samples, n_features, n_informative, n_targets, bias, effective_rank, tail_strength, noise, shuffle, coef, n_parts, order, use_full_low_rank, client): c = client from cuml.dask.datasets import make_regression result = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_informative, n_targets=n_targets, bias=bias, effective_rank=effective_rank, noise=noise, shuffle=shuffle, coef=coef, n_parts=n_parts, use_full_low_rank=use_full_low_rank, order=order) if coef: out, values, coefs = result else: out, values = result assert out.shape == (n_samples, n_features), "out shape mismatch" if n_targets > 1: assert values.shape == (n_samples, n_targets), \ "values shape mismatch" else: assert values.shape == (n_samples,), "values shape mismatch" assert len(out.chunks[0]) == n_parts assert len(out.chunks[1]) == 1 if coef: if n_targets > 1: assert coefs.shape == (n_features, n_targets), \ "coefs shape mismatch" assert len(coefs.chunks[1]) == 1 else: assert coefs.shape == (n_features,), "coefs shape mismatch" assert len(coefs.chunks[0]) == 1 test1 = da.all(da.sum(coefs != 0.0, axis=0) == n_informative) std_test2 = da.std(values - (da.dot(out, coefs) + bias), axis=0) test1, std_test2 = da.compute(test1, std_test2) diff = cp.abs(1.0 - std_test2) test2 = cp.all(diff < 1.5 * 10**(-1.)) assert test1, \ "Unexpected number of informative features" assert test2, "Unexpectedly incongruent outputs" data_ddh = DistributedDataHandler.create(data=(out, values), client=c) out_part, value_part = data_ddh.gpu_futures[0][1].result() if coef: coefs_ddh = DistributedDataHandler.create(data=coefs, client=c) coefs_part = coefs_ddh.gpu_futures[0][1].result() if order == 'F': assert out_part.flags['F_CONTIGUOUS'] if n_targets > 1: assert value_part.flags['F_CONTIGUOUS'] if coef: assert coefs_part.flags['F_CONTIGUOUS'] elif order == 'C': assert out_part.flags['C_CONTIGUOUS'] if n_targets > 1: assert value_part.flags['C_CONTIGUOUS'] if coef: assert coefs_part.flags['C_CONTIGUOUS']