def test_output_type_context_mgr(global_output_type, context_type): dataset = get_small_dataset('numba') test_type = 'cupy' if global_output_type != 'cupy' else 'numpy' cuml.set_global_output_type(test_type) # use cuml context manager with cuml.using_output_type(context_type): dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1) dbscan_float.fit(dataset) res = dbscan_float.labels_ if context_type == 'numba': assert is_cuda_array(res) else: assert isinstance(res, test_output_types[context_type]) # use cuml again outside the context manager dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1) dbscan_float.fit(dataset) res = dbscan_float.labels_ assert isinstance(res, test_output_types[test_type])
def global_output_type(request): output_type = request.param yield output_type # Ensure we reset the type at the end of the test cuml.set_global_output_type(None)
def check_correct_type(index): output_type = test_output_types_str[index] # Force a race condition if index == 0: sleep(0.1) set_global_output_type(output_type) sleep(0.5) return cuml.global_settings.output_type == output_type
def test_dask_sql_sg_logistic_regression( datatype, nrows, ncols, n_parts, wrap_predict ): if wrap_predict: cuml.set_global_output_type("input") else: cuml.set_global_output_type("cudf") X, y = make_classification( n_samples=nrows, n_features=ncols, n_informative=5, random_state=0 ) X_train, X_test, y_train, y_test = train_test_split(X, y) train_df = cudf.DataFrame( X_train, dtype=datatype, columns=[chr(i) for i in range(ncols)] ) train_df["target"] = y_train train_ddf = dask_cudf.from_cudf(train_df, npartitions=n_parts) c = Context() c.create_table("train_df", train_ddf) train_query = f""" CREATE MODEL model WITH ( model_class = 'cuml.linear_model.LogisticRegression', wrap_predict = {wrap_predict}, target_column = 'target' ) AS ( SELECT * FROM train_df ) """ c.sql(train_query) skmodel = LogisticRegression().fit(X_train, y_train) test_df = cudf.DataFrame( X_test, dtype=datatype, columns=[chr(i) for i in range(ncols)] ) test_ddf = dask_cudf.from_cudf(test_df, npartitions=n_parts) c.create_table("test_df", test_ddf) inference_query = """ SELECT * FROM PREDICT( MODEL model, SELECT * FROM test_df ) """ preds = c.sql(inference_query).compute() score = cuml.metrics.accuracy_score(y_test, preds["target"].to_numpy()) assert score >= skmodel.score(X_test, y_test) - 0.022
def test_global_output_type(global_output_type, input_type): dataset = get_small_dataset(input_type) cuml.set_global_output_type(global_output_type) dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1) dbscan_float.fit(dataset) res = dbscan_float.labels_ if global_output_type == 'numba': assert is_cuda_array(res) else: assert isinstance(res, test_output_types[global_output_type])
def __init__(self, operation_type: str, params: Optional[dict] = None): super().__init__(operation_type, params) self.operation_impl = self._convert_to_operation(operation_type) self.operation_id = operation_type cuml.set_global_output_type('numpy')