def test_regression_pandas_support(self): X, y = sklearn.datasets.fetch_openml( data_id=41514, # diabetes return_X_y=True, as_frame=True, ) # This test only make sense if input is dataframe self.assertTrue(isinstance(X, pd.DataFrame)) self.assertTrue(isinstance(y, pd.Series)) automl = AutoSklearnRegressor( time_left_for_this_task=30, per_run_time_limit=5, ) # Make sure we error out because y is not encoded automl.fit(X, y) # Make sure that at least better than random. # We use same X_train==X_test to test code quality self.assertTrue(automl.score(X, y) > 0.5) automl.refit(X, y) # Make sure that at least better than random. self.assertTrue(r2(y, automl.predict(X)) > 0.5)
def test_regression_pandas_support(tmp_dir, output_dir, dask_client): X, y = sklearn.datasets.fetch_openml( data_id=41514, # diabetes return_X_y=True, as_frame=True, ) # This test only make sense if input is dataframe assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) automl = AutoSklearnRegressor( time_left_for_this_task=40, per_run_time_limit=5, dask_client=dask_client, tmp_folder=tmp_dir, output_folder=output_dir, ) # Make sure we error out because y is not encoded automl.fit(X, y) # Make sure that at least better than random. # We use same X_train==X_test to test code quality assert automl.score(X, y) >= 0.5, print_debug_information(automl) automl.refit(X, y) # Make sure that at least better than random. assert r2(y, automl.predict(X)) > 0.5, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl)