def test_stacked_fit_predict_classification(X_y_binary, X_y_multi, stackable_classifiers, problem_type): if problem_type == ProblemTypes.BINARY: X, y = X_y_binary num_classes = 2 elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi num_classes = 3 input_pipelines = [make_pipeline_from_components([classifier], problem_type) for classifier in stackable_classifiers] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert isinstance(y_pred, ww.DataColumn) assert not np.isnan(y_pred.to_series()).all() y_pred_proba = clf.predict_proba(X) assert isinstance(y_pred_proba, ww.DataTable) assert y_pred_proba.shape == (len(y), num_classes) assert not np.isnan(y_pred_proba.to_dataframe()).all().all() clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, final_estimator=RandomForestClassifier(), n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert isinstance(y_pred, ww.DataColumn) assert not np.isnan(y_pred.to_series()).all() y_pred_proba = clf.predict_proba(X) assert y_pred_proba.shape == (len(y), num_classes) assert isinstance(y_pred_proba, ww.DataTable) assert not np.isnan(y_pred_proba.to_dataframe()).all().all()
def test_stacked_ensemble_multilevel(logistic_regression_binary_pipeline_class): # checks passing a stacked ensemble classifier as a final estimator X = pd.DataFrame(np.random.rand(50, 5)) y = pd.Series([1, 0] * 25) base = StackedEnsembleClassifier(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})], n_jobs=1) clf = StackedEnsembleClassifier(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})], final_estimator=base, n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert not np.isnan(y_pred.to_series()).all()
def test_stacked_ensemble_n_jobs_negative_one(X_y_binary, logistic_regression_binary_pipeline_class): X, y = X_y_binary input_pipelines = [logistic_regression_binary_pipeline_class(parameters={})] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=-1) expected_parameters = { "input_pipelines": input_pipelines, "final_estimator": None, 'cv': None, 'n_jobs': -1 } assert clf.parameters == expected_parameters clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert not np.isnan(y_pred.to_series()).all()
def test_stacked_ensemble_init_with_multiple_same_estimators(X_y_binary, logistic_regression_binary_pipeline_class): # Checks that it is okay to pass multiple of the same type of estimator X, y = X_y_binary input_pipelines = [logistic_regression_binary_pipeline_class(parameters={}), logistic_regression_binary_pipeline_class(parameters={})] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=1) expected_parameters = { "input_pipelines": input_pipelines, "final_estimator": None, 'cv': None, 'n_jobs': 1 } assert clf.parameters == expected_parameters fitted = clf.fit(X, y) assert isinstance(fitted, StackedEnsembleClassifier) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert not np.isnan(y_pred.to_series()).all()