def test_stacked_fit_predict_classification(X_y_binary, X_y_multi, stackable_classifiers, problem_type):
    if problem_type == ProblemTypes.BINARY:
        X, y = X_y_binary
        num_classes = 2
    elif problem_type == ProblemTypes.MULTICLASS:
        X, y = X_y_multi
        num_classes = 3

    input_pipelines = [make_pipeline_from_components([classifier], problem_type)
                       for classifier in stackable_classifiers]
    clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=1)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert len(y_pred) == len(y)
    assert isinstance(y_pred, ww.DataColumn)
    assert not np.isnan(y_pred.to_series()).all()

    y_pred_proba = clf.predict_proba(X)
    assert isinstance(y_pred_proba, ww.DataTable)
    assert y_pred_proba.shape == (len(y), num_classes)
    assert not np.isnan(y_pred_proba.to_dataframe()).all().all()

    clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, final_estimator=RandomForestClassifier(), n_jobs=1)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert len(y_pred) == len(y)
    assert isinstance(y_pred, ww.DataColumn)
    assert not np.isnan(y_pred.to_series()).all()

    y_pred_proba = clf.predict_proba(X)
    assert y_pred_proba.shape == (len(y), num_classes)
    assert isinstance(y_pred_proba, ww.DataTable)
    assert not np.isnan(y_pred_proba.to_dataframe()).all().all()
def test_stacked_ensemble_multilevel(logistic_regression_binary_pipeline_class):
    # checks passing a stacked ensemble classifier as a final estimator
    X = pd.DataFrame(np.random.rand(50, 5))
    y = pd.Series([1, 0] * 25)
    base = StackedEnsembleClassifier(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})], n_jobs=1)
    clf = StackedEnsembleClassifier(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})],
                                    final_estimator=base,
                                    n_jobs=1)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert len(y_pred) == len(y)
    assert not np.isnan(y_pred.to_series()).all()
def test_stacked_ensemble_n_jobs_negative_one(X_y_binary, logistic_regression_binary_pipeline_class):
    X, y = X_y_binary
    input_pipelines = [logistic_regression_binary_pipeline_class(parameters={})]
    clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=-1)
    expected_parameters = {
        "input_pipelines": input_pipelines,
        "final_estimator": None,
        'cv': None,
        'n_jobs': -1
    }
    assert clf.parameters == expected_parameters
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert len(y_pred) == len(y)
    assert not np.isnan(y_pred.to_series()).all()
def test_stacked_ensemble_init_with_multiple_same_estimators(X_y_binary, logistic_regression_binary_pipeline_class):
    # Checks that it is okay to pass multiple of the same type of estimator
    X, y = X_y_binary
    input_pipelines = [logistic_regression_binary_pipeline_class(parameters={}),
                       logistic_regression_binary_pipeline_class(parameters={})]
    clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=1)
    expected_parameters = {
        "input_pipelines": input_pipelines,
        "final_estimator": None,
        'cv': None,
        'n_jobs': 1
    }
    assert clf.parameters == expected_parameters

    fitted = clf.fit(X, y)
    assert isinstance(fitted, StackedEnsembleClassifier)

    y_pred = clf.predict(X)
    assert len(y_pred) == len(y)
    assert not np.isnan(y_pred.to_series()).all()