def test_real_algos_runner(algo_name): pair = algorithms.algorithm_by_name(algo_name) if (algo_name == 'UMAP' and not has_umap()) or \ (algo_name == 'FIL' and not has_xgboost()): pytest.xfail() runner = AccuracyComparisonRunner([20], [5], dataset_name='classification', test_fraction=0.20) results = runner.run(pair)[0] print(results) assert results["cuml_acc"] is not None
def all_algorithms(): """Returns all defined AlgorithmPair objects""" algorithms = [ AlgorithmPair( sklearn.cluster.KMeans, cuml.cluster.KMeans, shared_args=dict(init="k-means++", n_clusters=8, max_iter=300, n_init=1), cuml_args=dict(oversampling_factor=0), name="KMeans", accepts_labels=False, accuracy_function=metrics.homogeneity_score, ), AlgorithmPair( sklearn.decomposition.PCA, cuml.PCA, shared_args=dict(n_components=10), name="PCA", accepts_labels=False, ), AlgorithmPair( sklearn.decomposition.TruncatedSVD, cuml.decomposition.tsvd.TruncatedSVD, shared_args=dict(n_components=10), name="tSVD", accepts_labels=False, ), AlgorithmPair( sklearn.random_projection.GaussianRandomProjection, cuml.random_projection.GaussianRandomProjection, shared_args=dict(n_components=10), name="GaussianRandomProjection", bench_func=fit_transform, accepts_labels=False, ), AlgorithmPair( sklearn.random_projection.SparseRandomProjection, cuml.random_projection.SparseRandomProjection, shared_args=dict(n_components=10), name="SparseRandomProjection", bench_func=fit_transform, accepts_labels=False, ), AlgorithmPair( sklearn.neighbors.NearestNeighbors, cuml.neighbors.NearestNeighbors, shared_args=dict(n_neighbors=1024), cpu_args=dict(algorithm="brute", n_jobs=-1), cuml_args={}, name="NearestNeighbors", accepts_labels=False, bench_func=fit_kneighbors, ), AlgorithmPair( sklearn.cluster.DBSCAN, cuml.DBSCAN, shared_args=dict(eps=3, min_samples=2), cpu_args=dict(algorithm="brute"), name="DBSCAN", accepts_labels=False, ), AlgorithmPair( sklearn.linear_model.LinearRegression, cuml.linear_model.LinearRegression, shared_args={}, name="LinearRegression", accepts_labels=True, accuracy_function=metrics.r2_score, ), AlgorithmPair( sklearn.linear_model.ElasticNet, cuml.linear_model.ElasticNet, shared_args={ "alpha": 0.1, "l1_ratio": 0.5 }, name="ElasticNet", accepts_labels=True, accuracy_function=metrics.r2_score, ), AlgorithmPair( sklearn.linear_model.Lasso, cuml.linear_model.Lasso, shared_args={}, name="Lasso", accepts_labels=True, accuracy_function=metrics.r2_score, ), AlgorithmPair( sklearn.linear_model.Ridge, cuml.linear_model.Ridge, shared_args={}, name="Ridge", accepts_labels=True, accuracy_function=metrics.r2_score, ), AlgorithmPair( sklearn.linear_model.LogisticRegression, cuml.linear_model.LogisticRegression, shared_args=dict(), # Use default solvers name="LogisticRegression", accepts_labels=True, accuracy_function=metrics.accuracy_score, ), AlgorithmPair( sklearn.ensemble.RandomForestClassifier, cuml.ensemble.RandomForestClassifier, shared_args={ "max_features": 1.0, "n_estimators": 10 }, name="RandomForestClassifier", accepts_labels=True, cpu_data_prep_hook=_labels_to_int_hook, cuml_data_prep_hook=_labels_to_int_hook, accuracy_function=metrics.accuracy_score, ), AlgorithmPair( sklearn.ensemble.RandomForestRegressor, cuml.ensemble.RandomForestRegressor, shared_args={ "max_features": 1.0, "n_estimators": 10 }, name="RandomForestRegressor", accepts_labels=True, accuracy_function=metrics.r2_score, ), AlgorithmPair( sklearn.manifold.TSNE, cuml.manifold.TSNE, shared_args=dict(), name="TSNE", accepts_labels=False, ), AlgorithmPair( None, cuml.linear_model.MBSGDClassifier, shared_args={}, cuml_args=dict(eta0=0.005, epochs=100), name="MBSGDClassifier", accepts_labels=True, accuracy_function=cuml.metrics.accuracy_score, ), AlgorithmPair( sklearn.svm.SVC, cuml.svm.SVC, shared_args={"kernel": "rbf"}, cuml_args={}, name="SVC-RBF", accepts_labels=True, accuracy_function=cuml.metrics.accuracy_score, ), AlgorithmPair( sklearn.svm.SVC, cuml.svm.SVC, shared_args={"kernel": "linear"}, cuml_args={}, name="SVC-Linear", accepts_labels=True, accuracy_function=cuml.metrics.accuracy_score, ), AlgorithmPair( sklearn.svm.SVR, cuml.svm.SVR, shared_args={"kernel": "rbf"}, cuml_args={}, name="SVR-RBF", accepts_labels=True, accuracy_function=cuml.metrics.r2_score, ), AlgorithmPair( sklearn.svm.SVR, cuml.svm.SVR, shared_args={"kernel": "linear"}, cuml_args={}, name="SVR-Linear", accepts_labels=True, accuracy_function=cuml.metrics.r2_score, ), AlgorithmPair(sklearn.neighbors.KNeighborsClassifier, cuml.neighbors.KNeighborsClassifier, shared_args={}, cuml_args={}, name="KNeighborsClassifier", accepts_labels=True, accuracy_function=cuml.metrics.accuracy_score), AlgorithmPair(sklearn.neighbors.KNeighborsRegressor, cuml.neighbors.KNeighborsRegressor, shared_args={}, cuml_args={}, name="KNeighborsRegressor", accepts_labels=True, accuracy_function=cuml.metrics.r2_score), AlgorithmPair(sklearn.naive_bayes.MultinomialNB, cuml.naive_bayes.MultinomialNB, shared_args={}, cuml_args={}, name="MultinomialNB", accepts_labels=True, accuracy_function=cuml.metrics.accuracy_score), AlgorithmPair( treelite, cuml.ForestInference, shared_args=dict(num_rounds=100, max_depth=10), cuml_args=dict( fil_algo="AUTO", output_class=False, threshold=0.5, storage_type="auto", ), name="FIL", accepts_labels=False, setup_cpu_func=_build_treelite_classifier, setup_cuml_func=_build_fil_classifier, cpu_data_prep_hook=_treelite_format_hook, accuracy_function=_treelite_fil_accuracy_score, bench_func=predict, ), AlgorithmPair( treelite, cuml.ForestInference, shared_args=dict(n_estimators=100, max_leaf_nodes=2**10), cuml_args=dict( fil_algo="AUTO", output_class=False, threshold=0.5, storage_type="SPARSE", ), name="Sparse-FIL-SKL", accepts_labels=False, setup_cpu_func=_build_cpu_skl_classifier, setup_cuml_func=_build_fil_skl_classifier, accuracy_function=_treelite_fil_accuracy_score, bench_func=predict, ), AlgorithmPair( umap.UMAP if has_umap() else None, cuml.manifold.UMAP, shared_args=dict(n_neighbors=5, n_epochs=500), name="UMAP-Unsupervised", accepts_labels=True, accuracy_function=cuml.metrics.trustworthiness, ), AlgorithmPair( umap.UMAP if has_umap() else None, cuml.manifold.UMAP, shared_args=dict(n_neighbors=5, n_epochs=500), name="UMAP-Supervised", accepts_labels=True, accuracy_function=cuml.metrics.trustworthiness, ), AlgorithmPair(sklearn.preprocessing.StandardScaler, StandardScaler, shared_args=dict(), name="StandardScaler", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.MinMaxScaler, MinMaxScaler, shared_args=dict(), name="MinMaxScaler", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.MaxAbsScaler, MaxAbsScaler, shared_args=dict(), name="MaxAbsScaler", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.Normalizer, Normalizer, shared_args=dict(), name="Normalizer", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(skSimpleImputer, SimpleImputer, shared_args=dict(), name="SimpleImputer", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.RobustScaler, RobustScaler, shared_args=dict(), name="RobustScaler", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.PolynomialFeatures, PolynomialFeatures, shared_args=dict(), name="PolynomialFeatures", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.StandardScaler, StandardScaler, shared_args=dict(), name="SparseCSRStandardScaler", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.MinMaxScaler, MinMaxScaler, shared_args=dict(), name="SparseCSRMinMaxScaler", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.MaxAbsScaler, MaxAbsScaler, shared_args=dict(), name="SparseCSRMaxAbsScaler", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.Normalizer, Normalizer, shared_args=dict(), name="SparseCSRNormalizer", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.RobustScaler, RobustScaler, shared_args=dict(), name="SparseCSCRobustScaler", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(skSimpleImputer, SimpleImputer, shared_args=dict(), name="SparseCSCSimpleImputer", accepts_labels=False, bench_func=fit_transform), AlgorithmPair(sklearn.preprocessing.PolynomialFeatures, PolynomialFeatures, shared_args=dict(), name="SparseCSRPolynomialFeatures", accepts_labels=False, bench_func=fit_transform) ] return algorithms
from cuml.benchmark.bench_helper_funcs import ( fit, fit_kneighbors, fit_transform, predict, _build_cpu_skl_classifier, _build_fil_skl_classifier, _build_fil_classifier, _build_treelite_classifier, _treelite_fil_accuracy_score, ) import treelite import treelite_runtime if has_umap(): import umap class AlgorithmPair: """ Wraps a cuML algorithm and (optionally) a cpu-based algorithm (typically scikit-learn, but does not need to be as long as it offers `fit` and `predict` or `transform` methods). Provides mechanisms to run each version with default arguments. If no CPU-based version of the algorithm is available, pass None for the cpu_class when instantiating Parameters ---------- cpu_class : class