Пример #1
0
def test_autosklearn_classification_methods_returns_self(dask_client):
    """
    Currently this method only tests that the methods of AutoSklearnClassifier
    is able to fit using fit(), fit_ensemble() and refit()
    """
    X_train, y_train, X_test, y_test = putil.get_dataset('iris')
    automl = AutoSklearnClassifier(time_left_for_this_task=60,
                                   per_run_time_limit=10,
                                   ensemble_size=0,
                                   dask_client=dask_client,
                                   exclude_preprocessors=['fast_ica'])

    automl_fitted = automl.fit(X_train, y_train)
    assert automl is automl_fitted

    automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
    assert automl is automl_ensemble_fitted

    automl_refitted = automl.refit(X_train.copy(), y_train.copy())
    assert automl is automl_refitted
def test_autosklearn_anneal(as_frame):
    """
    This test makes sure that anneal dataset can be fitted and scored.
    This dataset is quite complex, with NaN, categorical and numerical columns
    so is a good testcase for unit-testing
    """
    X, y = sklearn.datasets.fetch_openml(data_id=2,
                                         return_X_y=True,
                                         as_frame=as_frame)
    automl = AutoSklearnClassifier(time_left_for_this_task=60,
                                   ensemble_size=0,
                                   delete_tmp_folder_after_terminate=False,
                                   initial_configurations_via_metalearning=0,
                                   smac_scenario_args={'runcount_limit': 6},
                                   resampling_strategy='holdout-iterative-fit')

    if as_frame:
        # Let autosklearn calculate the feat types
        automl_fitted = automl.fit(X, y)
    else:
        X_, y_ = sklearn.datasets.fetch_openml(data_id=2,
                                               return_X_y=True,
                                               as_frame=True)
        feat_type = [
            'categorical' if X_[col].dtype.name == 'category' else 'numerical'
            for col in X_.columns
        ]
        automl_fitted = automl.fit(X, y, feat_type=feat_type)
    assert automl is automl_fitted

    automl_ensemble_fitted = automl.fit_ensemble(y, ensemble_size=5)
    assert automl is automl_ensemble_fitted

    # We want to make sure we can learn from this data.
    # This is a test to make sure the data format (numpy/pandas)
    # can be used in a meaningful way -- not meant for generalization,
    # hence we use the train dataset
    assert automl_fitted.score(X, y) > 0.75