Exemplo n.º 1
0
def test_all_estimators_check_fit(X_y_binary, ts_data, test_estimator_needs_fitting_false, helper_functions):
    estimators_to_check = [estimator for estimator in _all_estimators() if estimator not in [StackedEnsembleClassifier, StackedEnsembleRegressor, TimeSeriesBaselineEstimator]] + [test_estimator_needs_fitting_false]
    for component_class in estimators_to_check:
        if not component_class.needs_fitting:
            continue

        if ProblemTypes.TIME_SERIES_REGRESSION in component_class.supported_problem_types:
            X, y = ts_data
        else:
            X, y = X_y_binary

        component = helper_functions.safe_init_component_with_njobs_1(component_class)
        with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'):
            component.predict(X)
        if ProblemTypes.BINARY in component.supported_problem_types or ProblemTypes.MULTICLASS in component.supported_problem_types:
            with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'):
                component.predict_proba(X)

        with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'):
            component.feature_importance

        component.fit(X, y)

        if ProblemTypes.BINARY in component.supported_problem_types or ProblemTypes.MULTICLASS in component.supported_problem_types:
            component.predict_proba(X)

        component.predict(X)
        component.feature_importance
Exemplo n.º 2
0
def test_scikit_learn_wrapper(X_y_binary, X_y_multi, X_y_regression, ts_data):
    for estimator in [estimator for estimator in _all_estimators() if estimator.model_family != ModelFamily.ENSEMBLE]:
        for problem_type in estimator.supported_problem_types:
            if problem_type == ProblemTypes.BINARY:
                X, y = X_y_binary
                num_classes = 2
                pipeline_class = BinaryClassificationPipeline
            elif problem_type == ProblemTypes.MULTICLASS:
                X, y = X_y_multi
                num_classes = 3
                pipeline_class = MulticlassClassificationPipeline
            elif problem_type == ProblemTypes.REGRESSION:
                X, y = X_y_regression
                pipeline_class = RegressionPipeline

            elif problem_type in [ProblemTypes.TIME_SERIES_REGRESSION, ProblemTypes.TIME_SERIES_MULTICLASS, ProblemTypes.TIME_SERIES_BINARY]:
                continue

            evalml_pipeline = pipeline_class([estimator])
            scikit_estimator = scikit_learn_wrapped_estimator(evalml_pipeline)
            scikit_estimator.fit(X, y)
            y_pred = scikit_estimator.predict(X)
            assert len(y_pred) == len(y)
            assert not np.isnan(y_pred).all()
            if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
                y_pred_proba = scikit_estimator.predict_proba(X)
                assert y_pred_proba.shape == (len(y), num_classes)
                assert not np.isnan(y_pred_proba).all().all()
Exemplo n.º 3
0
def test_scikit_learn_wrapper(X_y_binary, X_y_multi, X_y_regression):
    for estimator in [
            estimator for estimator in _all_estimators()
            if estimator.model_family != ModelFamily.ENSEMBLE
    ]:
        for problem_type in estimator.supported_problem_types:
            if problem_type == ProblemTypes.BINARY:
                X, y = X_y_binary
                num_classes = 2
            elif problem_type == ProblemTypes.MULTICLASS:
                X, y = X_y_multi
                num_classes = 3
            elif problem_type == ProblemTypes.REGRESSION:
                X, y = X_y_regression
            elif problem_type in [
                    ProblemTypes.TIME_SERIES_REGRESSION,
                    ProblemTypes.TIME_SERIES_MULTICLASS,
                    ProblemTypes.TIME_SERIES_BINARY
            ]:
                # Skipping because make_pipeline_from_components does not yet work for time series.
                continue

            evalml_pipeline = make_pipeline_from_components([estimator()],
                                                            problem_type)
            scikit_estimator = scikit_learn_wrapped_estimator(evalml_pipeline)
            scikit_estimator.fit(X, y)
            y_pred = scikit_estimator.predict(X)
            assert len(y_pred) == len(y)
            assert not np.isnan(y_pred).all()
            if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
                y_pred_proba = scikit_estimator.predict_proba(X)
                assert y_pred_proba.shape == (len(y), num_classes)
                assert not np.isnan(y_pred_proba).all().all()
Exemplo n.º 4
0
def test_all_transformers_needs_fitting():
    for component_class in _all_transformers() + _all_estimators():
        if component_class.__name__ in [
                'DropColumns', 'SelectColumns', 'DelayedFeatureTransformer'
        ]:
            assert not component_class.needs_fitting
        else:
            assert component_class.needs_fitting
Exemplo n.º 5
0
def stackable_regressors(helper_functions):
    stackable_regressors = []
    for estimator_class in _all_estimators():
        supported_problem_types = [handle_problem_types(pt) for pt in estimator_class.supported_problem_types]
        if (set(supported_problem_types) == {ProblemTypes.REGRESSION, ProblemTypes.TIME_SERIES_REGRESSION} and
            estimator_class.model_family not in _nonstackable_model_families and
                estimator_class.model_family != ModelFamily.ENSEMBLE):
            stackable_regressors.append(helper_functions.safe_init_component_with_njobs_1(estimator_class))
    return stackable_regressors
Exemplo n.º 6
0
def stackable_classifiers(helper_functions):
    stackable_classifiers = []
    for estimator_class in _all_estimators():
        supported_problem_types = [handle_problem_types(pt) for pt in estimator_class.supported_problem_types]
        if (set(supported_problem_types) == {ProblemTypes.BINARY, ProblemTypes.MULTICLASS,
                                             ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS} and
            estimator_class.model_family not in _nonstackable_model_families and
                estimator_class.model_family != ModelFamily.ENSEMBLE):
            stackable_classifiers.append(estimator_class)
    return stackable_classifiers
Exemplo n.º 7
0
def all_pipeline_classes():
    all_possible_pipeline_classes = []
    for estimator in [
            estimator for estimator in _all_estimators()
            if estimator != StackedEnsembleClassifier
            and estimator != StackedEnsembleRegressor
    ]:
        for problem_type in estimator.supported_problem_types:
            all_possible_pipeline_classes.append(
                create_mock_pipeline(estimator, problem_type))
    return all_possible_pipeline_classes
Exemplo n.º 8
0
def test_serialization_protocol(mock_cloudpickle_dump, tmpdir):
    path = os.path.join(str(tmpdir), 'pipe.pkl')
    component = LogisticRegressionClassifier()

    component.save(path)
    assert len(mock_cloudpickle_dump.call_args_list) == 1
    assert mock_cloudpickle_dump.call_args_list[0][1]['protocol'] == cloudpickle.DEFAULT_PROTOCOL

    mock_cloudpickle_dump.reset_mock()

    component.save(path, pickle_protocol=42)
    assert len(mock_cloudpickle_dump.call_args_list) == 1
    assert mock_cloudpickle_dump.call_args_list[0][1]['protocol'] == 42


@pytest.mark.parametrize("estimator_class", _all_estimators())
def test_estimators_accept_all_kwargs(estimator_class,
                                      logistic_regression_binary_pipeline_class,
                                      linear_regression_pipeline_class):
    try:
        estimator = estimator_class()
    except EnsembleMissingPipelinesError:
        if estimator_class == StackedEnsembleClassifier:
            estimator = estimator_class(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})])
        elif estimator_class == StackedEnsembleRegressor:
            estimator = estimator_class(input_pipelines=[linear_regression_pipeline_class(parameters={})])
    if estimator._component_obj is None:
        pytest.skip(f"Skipping {estimator_class} because does not have component object.")
    if estimator_class.model_family == ModelFamily.ENSEMBLE:
        params = estimator.parameters
    else: