def test_stacked_ensemble_init_with_invalid_estimators_parameter(): with pytest.raises(EnsembleMissingPipelinesError, match='must not be None or an empty list.'): StackedEnsembleClassifier() with pytest.raises(EnsembleMissingPipelinesError, match='must not be None or an empty list.'): StackedEnsembleClassifier(input_pipelines=[])
def test_stacked_feature_importance(mock_fit, X_y_binary, X_y_multi, stackable_classifiers, problem_type): if problem_type == ProblemTypes.BINARY: X, y = X_y_binary elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi input_pipelines = [make_pipeline_from_components([classifier], problem_type) for classifier in stackable_classifiers] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=1) clf.fit(X, y) mock_fit.assert_called() clf._is_fitted = True with pytest.raises(NotImplementedError, match="feature_importance is not implemented"): clf.feature_importance
def test_stacked_ensemble_n_jobs_negative_one(X_y_binary, logistic_regression_binary_pipeline_class): X, y = X_y_binary input_pipelines = [logistic_regression_binary_pipeline_class(parameters={})] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=-1) expected_parameters = { "input_pipelines": input_pipelines, "final_estimator": None, 'cv': None, 'n_jobs': -1 } assert clf.parameters == expected_parameters clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert not np.isnan(y_pred.to_series()).all()
def test_stacked_ensemble_does_not_overwrite_pipeline_random_state(mock_stack, logistic_regression_binary_pipeline_class): input_pipelines = [logistic_regression_binary_pipeline_class(parameters={}, random_state=3), logistic_regression_binary_pipeline_class(parameters={}, random_state=4)] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, random_state=5, n_jobs=1) estimators_used_in_ensemble = mock_stack.call_args[1]['estimators'] assert clf.random_state == 5 assert estimators_used_in_ensemble[0][1].pipeline.random_state == 3 assert estimators_used_in_ensemble[1][1].pipeline.random_state == 4
def test_stacked_different_input_pipelines_classification(): input_pipelines = [ make_pipeline_from_components([RandomForestClassifier()], ProblemTypes.MULTICLASS), make_pipeline_from_components([RandomForestClassifier()], ProblemTypes.BINARY) ] with pytest.raises(ValueError, match="All pipelines must have the same problem type."): StackedEnsembleClassifier(input_pipelines=input_pipelines)
def test_stacked_ensemble_nonstackable_model_families(): with pytest.raises( ValueError, match= "Pipelines with any of the following model families cannot be used as base pipelines" ): StackedEnsembleClassifier(input_pipelines=[ make_pipeline_from_components([BaselineClassifier()], ProblemTypes.BINARY) ])
def test_stacked_ensemble_init_with_multiple_same_estimators(X_y_binary, logistic_regression_binary_pipeline_class): # Checks that it is okay to pass multiple of the same type of estimator X, y = X_y_binary input_pipelines = [logistic_regression_binary_pipeline_class(parameters={}), logistic_regression_binary_pipeline_class(parameters={})] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=1) expected_parameters = { "input_pipelines": input_pipelines, "final_estimator": None, 'cv': None, 'n_jobs': 1 } assert clf.parameters == expected_parameters fitted = clf.fit(X, y) assert isinstance(fitted, StackedEnsembleClassifier) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert not np.isnan(y_pred.to_series()).all()
def test_ensemble_data(mock_fit, mock_score, dummy_binary_pipeline_class, stackable_classifiers): X = pd.DataFrame({"a": [i for i in range(100)]}) y = pd.Series([i % 2 for i in range(100)]) automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', max_batches=19, ensembling=True, _ensembling_split_size=0.25) mock_should_continue_callback = MagicMock(return_value=True) mock_pre_evaluation_callback = MagicMock() mock_post_evaluation_callback = MagicMock() training_indices, ensembling_indices, _, _ = split_data( ww.DataTable(np.arange(X.shape[0])), y, problem_type='binary', test_size=0.25, random_seed=0) training_indices, ensembling_indices = training_indices.to_dataframe( )[0].tolist(), ensembling_indices.to_dataframe()[0].tolist() engine = SequentialEngine( X_train=infer_feature_types(X), y_train=infer_feature_types(y), ensembling_indices=ensembling_indices, automl=automl, should_continue_callback=mock_should_continue_callback, pre_evaluation_callback=mock_pre_evaluation_callback, post_evaluation_callback=mock_post_evaluation_callback) pipeline1 = [dummy_binary_pipeline_class({'Mock Classifier': {'a': 1}})] engine.evaluate_batch(pipeline1) # check the fit length is correct, taking into account the data splits assert len(mock_fit.call_args[0][0]) == int(2 / 3 * len(training_indices)) input_pipelines = [ make_pipeline_from_components([classifier], problem_type='binary') for classifier in stackable_classifiers ] pipeline2 = [ make_pipeline_from_components( [StackedEnsembleClassifier(input_pipelines, n_jobs=1)], problem_type='binary', custom_name="Stacked Ensemble Classification Pipeline") ] engine.evaluate_batch(pipeline2) assert len(mock_fit.call_args[0][0]) == int(2 / 3 * len(ensembling_indices))
def test_stacked_ensemble_multilevel(logistic_regression_binary_pipeline_class): # checks passing a stacked ensemble classifier as a final estimator X = pd.DataFrame(np.random.rand(50, 5)) y = pd.Series([1, 0] * 25) base = StackedEnsembleClassifier(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})], n_jobs=1) clf = StackedEnsembleClassifier(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})], final_estimator=base, n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert not np.isnan(y_pred.to_series()).all()
def test_stacked_fit_predict_classification(X_y_binary, X_y_multi, stackable_classifiers, problem_type): if problem_type == ProblemTypes.BINARY: X, y = X_y_binary num_classes = 2 elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi num_classes = 3 input_pipelines = [make_pipeline_from_components([classifier], problem_type) for classifier in stackable_classifiers] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert isinstance(y_pred, ww.DataColumn) assert not np.isnan(y_pred.to_series()).all() y_pred_proba = clf.predict_proba(X) assert isinstance(y_pred_proba, ww.DataTable) assert y_pred_proba.shape == (len(y), num_classes) assert not np.isnan(y_pred_proba.to_dataframe()).all().all() clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, final_estimator=RandomForestClassifier(), n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert isinstance(y_pred, ww.DataColumn) assert not np.isnan(y_pred.to_series()).all() y_pred_proba = clf.predict_proba(X) assert y_pred_proba.shape == (len(y), num_classes) assert isinstance(y_pred_proba, ww.DataTable) assert not np.isnan(y_pred_proba.to_dataframe()).all().all()
def test_score_batch_works(mock_score, pipeline_score_side_effect, X_y_binary, dummy_binary_pipeline_class, stackable_classifiers, caplog): exceptions_to_check = [] expected_scores = {} for i, e in enumerate(pipeline_score_side_effect): # Ensemble pipeline has different name pipeline_name = f"Pipeline {i}" if i < len( pipeline_score_side_effect) - 1 else "Templated Pipeline" scores = no_exception_scores if isinstance(e, PipelineScoreError): scores = {"F1": np.nan, "AUC": np.nan, "Log Loss Binary": np.nan} scores.update(e.scored_successfully) exceptions_to_check.append(f"Score error for {pipeline_name}") expected_scores[pipeline_name] = scores X, y = X_y_binary automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', max_iterations=1, allowed_pipelines=[dummy_binary_pipeline_class]) engine = SequentialEngine(X_train=automl.X_train, y_train=automl.y_train, automl=automl) def make_pipeline_name(index): class DummyPipeline(dummy_binary_pipeline_class): custom_name = f"Pipeline {index}" return DummyPipeline({'Mock Classifier': {'a': index}}) pipelines = [ make_pipeline_name(i) for i in range(len(pipeline_score_side_effect) - 1) ] ensemble_input_pipelines = [ make_pipeline_from_components([classifier], problem_type="binary") for classifier in stackable_classifiers[:2] ] ensemble = make_pipeline_from_components( [StackedEnsembleClassifier(ensemble_input_pipelines, n_jobs=1)], problem_type="binary") pipelines.append(ensemble) def score_batch_and_check(): caplog.clear() with patch('evalml.pipelines.BinaryClassificationPipeline.score' ) as mock_score: mock_score.side_effect = pipeline_score_side_effect scores = engine.score_batch( pipelines, X, y, objectives=["Log Loss Binary", "F1", "AUC"]) assert scores == expected_scores for exception in exceptions_to_check: assert exception in caplog.text # Test scoring before search score_batch_and_check() automl.search() # Test scoring after search score_batch_and_check()
def test_train_batch_works(mock_score, pipeline_fit_side_effect, X_y_binary, dummy_binary_pipeline_class, stackable_classifiers, caplog): exceptions_to_check = [ str(e) for e in pipeline_fit_side_effect if isinstance(e, Exception) ] X, y = X_y_binary automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', max_time=1, max_iterations=2, train_best_pipeline=False, n_jobs=1) engine = SequentialEngine(X_train=automl.X_train, y_train=automl.y_train, automl=automl) def make_pipeline_name(index): class DummyPipeline(dummy_binary_pipeline_class): custom_name = f"Pipeline {index}" return DummyPipeline({'Mock Classifier': {'a': index}}) pipelines = [ make_pipeline_name(i) for i in range(len(pipeline_fit_side_effect) - 1) ] ensemble_input_pipelines = [ make_pipeline_from_components([classifier], problem_type="binary") for classifier in stackable_classifiers[:2] ] ensemble = make_pipeline_from_components( [StackedEnsembleClassifier(ensemble_input_pipelines, n_jobs=1)], problem_type="binary") pipelines.append(ensemble) def train_batch_and_check(): caplog.clear() with patch('evalml.pipelines.BinaryClassificationPipeline.fit' ) as mock_fit: mock_fit.side_effect = pipeline_fit_side_effect trained_pipelines = engine.train_batch(pipelines) assert len(trained_pipelines) == len( pipeline_fit_side_effect) - len(exceptions_to_check) assert mock_fit.call_count == len(pipeline_fit_side_effect) for exception in exceptions_to_check: assert exception in caplog.text # Test training before search is run train_batch_and_check() # Test training after search. automl.search() train_batch_and_check()
def test_stacked_different_input_pipelines_classification(): input_pipelines = [BinaryClassificationPipeline([RandomForestClassifier]), MulticlassClassificationPipeline([RandomForestClassifier])] with pytest.raises(ValueError, match="All pipelines must have the same problem type."): StackedEnsembleClassifier(input_pipelines=input_pipelines)
def test_stacked_ensemble_nonstackable_model_families(): with pytest.raises(ValueError, match="Pipelines with any of the following model families cannot be used as base pipelines"): StackedEnsembleClassifier(input_pipelines=[BinaryClassificationPipeline([BaselineClassifier])])