def test_autosklearn2_classification_methods_returns_self_sparse(dask_client): X_train, y_train, X_test, y_test = putil.get_dataset('breast_cancer', make_sparse=True) automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0, delete_tmp_folder_after_terminate=False, dask_client=dask_client) automl_fitted = automl.fit(X_train, y_train) assert automl is automl_fitted automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5) assert automl is automl_ensemble_fitted automl_refitted = automl.refit(X_train.copy(), y_train.copy()) assert automl is automl_refitted predictions = automl_fitted.predict(X_test) assert sklearn.metrics.accuracy_score( y_test, predictions) >= 2 / 3, print_debug_information(automl) assert "boosting" not in str( automl.get_configuration_space(X=X_train, y=y_train)) pickle.dumps(automl_fitted)
def test_classification_methods_returns_self(self): X_train, y_train, X_test, y_test = putil.get_dataset('iris') automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0,) automl_fitted = automl.fit(X_train, y_train) self.assertIs(automl, automl_fitted) automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5) self.assertIs(automl, automl_ensemble_fitted) automl_refitted = automl.refit(X_train.copy(), y_train.copy()) self.assertIs(automl, automl_refitted)
def init(df, param): params = deepcopy(param['options']['params']) params.pop('algo', None) params.pop('mode', None) params.pop('dataset_name', None) for key in params: try: if params[key].isdigit(): params[key] = int(params[key]) except: pass model = {} model["model"] = AutoSklearn2Classifier(**params) return model
def test_autosklearn2_classification_methods_returns_self(dask_client): X_train, y_train, X_test, y_test = putil.get_dataset('iris') automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0, dask_client=dask_client) automl_fitted = automl.fit(X_train, y_train) assert automl is automl_fitted automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5) assert automl is automl_ensemble_fitted automl_refitted = automl.refit(X_train.copy(), y_train.copy()) assert automl is automl_refitted predictions = automl_fitted.predict(X_test) assert sklearn.metrics.accuracy_score(y_test, predictions) >= 2 / 3 pickle.dumps(automl_fitted)
to_encode = [ "Gender", "Customer Type", "Type of Travel", "Class", ] for col in to_encode: orig_data[col] = LabelEncoder().fit_transform(orig_data[col]) orig_data["satisfaction"].replace("satisfied", 1, inplace=True) orig_data["satisfaction"].replace("neutral or dissatisfied", 0, inplace=True) scaler = StandardScaler() for col in orig_data.columns: if col != target: orig_data[col] = scaler.fit_transform(orig_data[[col]]) data_X = orig_data.loc[:, [x for x in orig_data.columns if x != target]] data_Y = orig_data.loc[:, target] data_X_train, data_X_test, data_y_train, data_y_test = train_test_split( data_X, data_Y, test_size=0.3, random_state=1) cls = AutoSklearn2Classifier(time_left_for_this_task=60) print("Fitting..") cls.fit(data_X_train, data_y_train) pred = cls.predict(data_X_test) print("AutoML precision_score", precision_score(data_y_test, pred)) print("AutoML recall_score", recall_score(data_y_test, pred)) print("AutoML roc_auc_score", roc_auc_score(data_y_test, pred))