Python AutoMLSearch.AutoMLSearch примеры, evalml.AutoMLSearch.AutoMLSearch Python примеры использования

Пример #1

0

Показать файл

def test_init(X_y_regression):
    X, y = X_y_regression

    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='regression',
                          objective="R2",
                          max_iterations=3,
                          n_jobs=1)
    automl.search()

    assert automl.n_jobs == 1
    assert isinstance(automl.rankings, pd.DataFrame)
    assert isinstance(automl.best_pipeline, PipelineBase)
    automl.best_pipeline.predict(X)

    # test with dataframes
    automl = AutoMLSearch(pd.DataFrame(X),
                          pd.Series(y),
                          problem_type='regression',
                          objective="R2",
                          max_iterations=3,
                          n_jobs=1)
    automl.search()

    assert isinstance(automl.rankings, pd.DataFrame)
    assert isinstance(automl.full_rankings, pd.DataFrame)
    assert isinstance(automl.best_pipeline, PipelineBase)
    automl.best_pipeline.predict(X)
    assert isinstance(automl.get_pipeline(0), PipelineBase)

Пример #2

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_init(X_y_binary):
    X, y = X_y_binary

    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          max_iterations=1,
                          n_jobs=1)
    automl.search()

    assert automl.n_jobs == 1
    assert isinstance(automl.rankings, pd.DataFrame)
    assert isinstance(automl.best_pipeline, PipelineBase)
    automl.best_pipeline.predict(X)

    # test with dataframes
    automl = AutoMLSearch(pd.DataFrame(X),
                          pd.Series(y),
                          problem_type='binary',
                          max_iterations=1,
                          n_jobs=1)
    automl.search()

    assert isinstance(automl.rankings, pd.DataFrame)
    assert isinstance(automl.full_rankings, pd.DataFrame)
    assert isinstance(automl.best_pipeline, PipelineBase)
    assert isinstance(automl.get_pipeline(0), PipelineBase)
    assert automl.objective.name == 'Log Loss Binary'
    automl.best_pipeline.predict(X)

Пример #3

0

Показать файл

Файл: test_automl_search_classification.py Проект: actuarial-tools/evalml

def test_non_optimizable_threshold_multi(mock_fit, mock_score, X_y_multi):
    mock_score.return_value = {"Log Loss Multiclass": 0.5}
    X, y = X_y_multi
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='multiclass',
                          objective='Log Loss Multiclass',
                          max_iterations=1)
    automl.search()
    mock_fit.assert_called()
    mock_score.assert_called()
    with pytest.raises(AttributeError):
        automl.best_pipeline.threshold

    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='multiclass',
                          objective='Log Loss Multiclass',
                          max_iterations=1,
                          optimize_thresholds=True)
    automl.search()
    mock_fit.assert_called()
    mock_score.assert_called()
    with pytest.raises(AttributeError):
        automl.best_pipeline.threshold

Пример #4

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_data_splitter(X_y_binary):
    X, y = X_y_binary
    cv_folds = 5
    automl = AutoMLSearch(
        X_train=X,
        y_train=y,
        problem_type='binary',
        data_splitter=BalancedClassificationDataCVSplit(n_splits=cv_folds),
        max_iterations=1,
        n_jobs=1)
    automl.search()

    assert isinstance(automl.rankings, pd.DataFrame)
    assert len(automl.results['pipeline_results'][0]["cv_data"]) == cv_folds

    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          data_splitter=TimeSeriesSplit(n_splits=cv_folds),
                          max_iterations=1,
                          n_jobs=1)
    automl.search()

    assert isinstance(automl.rankings, pd.DataFrame)
    assert len(automl.results['pipeline_results'][0]["cv_data"]) == cv_folds

Пример #5

0

Показать файл

def test_early_stopping(caplog, logistic_regression_binary_pipeline_class, X_y_binary):
    X, y = X_y_binary
    with pytest.raises(ValueError, match='patience value must be a positive integer.'):
        automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', objective='AUC', max_iterations=5, allowed_model_families=['linear_model'], patience=-1, random_seed=0)

    with pytest.raises(ValueError, match='tolerance value must be'):
        automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', objective='AUC', max_iterations=5, allowed_model_families=['linear_model'], patience=1, tolerance=1.5, random_seed=0)

    automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', objective='AUC', max_iterations=5,
                          allowed_model_families=['linear_model'], patience=2, tolerance=0.05,
                          random_seed=0, n_jobs=1)
    mock_results = {
        'search_order': [0, 1, 2],
        'pipeline_results': {}
    }

    scores = [0.95, 0.84, 0.96]  # 0.96 is only 1% greater so it doesn't trigger patience due to tolerance
    for id in mock_results['search_order']:
        mock_results['pipeline_results'][id] = {}
        mock_results['pipeline_results'][id]['score'] = scores[id]
        mock_results['pipeline_results'][id]['pipeline_class'] = logistic_regression_binary_pipeline_class

    automl._results = mock_results
    automl._check_stopping_condition(time.time())
    out = caplog.text
    assert "2 iterations without improvement. Stopping search early." in out

Пример #6

0

Показать файл

Файл: test_automl_search_regression.py Проект: baagie7/evalml

def test_log_metrics_only_passed_directly(X_y_regression):
    X, y = X_y_regression
    with pytest.raises(ObjectiveNotFoundError, match="RootMeanSquaredLogError is not a valid Objective!"):
        AutoMLSearch(X_train=X, y_train=y, problem_type='regression', additional_objectives=['RootMeanSquaredLogError', 'MeanSquaredLogError'])

    ar = AutoMLSearch(X_train=X, y_train=y, problem_type='regression', additional_objectives=[RootMeanSquaredLogError(), MeanSquaredLogError()])
    assert ar.additional_objectives[0].name == 'Root Mean Squared Log Error'
    assert ar.additional_objectives[1].name == 'Mean Squared Log Error'

Пример #7

0

Показать файл

Файл: test_automl_search_classification.py Проект: joalmjoalm/evalml

def test_random_seed(X_y_binary):
    X, y = X_y_binary

    automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', objective=Precision(), max_iterations=5, random_seed=0, n_jobs=1)
    automl.search()

    automl_1 = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', objective=Precision(), max_iterations=5, random_seed=0, n_jobs=1)
    automl_1.search()
    assert automl.rankings.equals(automl_1.rankings)

Пример #8

0

Показать файл

Файл: test_automl_search_regression.py Проект: baagie7/evalml

def test_random_seed(X_y_regression):
    X, y = X_y_regression
    automl = AutoMLSearch(X_train=X, y_train=y, problem_type='regression', objective="R2", max_iterations=5, random_seed=0,
                          n_jobs=1)
    automl.search()

    automl_1 = AutoMLSearch(X_train=X, y_train=y, problem_type='regression', objective="R2", max_iterations=5, random_seed=0,
                            n_jobs=1)
    automl_1.search()

    # need to use assert_frame_equal as R2 could be different at the 10+ decimal
    assert pd.testing.assert_frame_equal(automl.rankings, automl_1.rankings) is None

Пример #9

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_init_objective(X_y_binary):
    X, y = X_y_binary
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          objective=Precision(),
                          max_iterations=1)
    assert isinstance(automl.objective, Precision)
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          objective='Precision',
                          max_iterations=1)
    assert isinstance(automl.objective, Precision)

Пример #10

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_plot_iterations_ipython_mock_import_failure(mock_ipython_display,
                                                     X_y_binary):
    pytest.importorskip(
        'IPython.display',
        reason='Skipping plotting test because ipywidgets not installed')
    go = pytest.importorskip(
        'plotly.graph_objects',
        reason='Skipping plotting test because plotly not installed')
    X, y = X_y_binary

    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          objective="f1",
                          max_iterations=3,
                          n_jobs=1)
    automl.search()

    mock_ipython_display.side_effect = ImportError('KABOOOOOOMMMM')
    plot = automl.plot.search_iteration_plot(interactive_plot=True)
    mock_ipython_display.assert_called_once()

    assert isinstance(plot, go.Figure)
    assert isinstance(plot.data, tuple)
    plot_data = plot.data[0]
    x = pd.Series(plot_data['x'])
    y = pd.Series(plot_data['y'])
    assert x.is_monotonic_increasing
    assert y.is_monotonic_increasing
    assert len(x) == 3
    assert len(y) == 3

Пример #11

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_optimizable_threshold_disabled(mock_fit, mock_score,
                                        mock_predict_proba,
                                        mock_encode_targets,
                                        mock_optimize_threshold, X_y_binary):
    mock_optimize_threshold.return_value = 0.8
    X, y = X_y_binary
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          objective='precision',
                          max_iterations=1,
                          optimize_thresholds=False)
    mock_score.return_value = {automl.objective.name: 1.0}
    automl.search()
    mock_fit.assert_called()
    mock_score.assert_called()
    assert not mock_predict_proba.called
    assert not mock_optimize_threshold.called
    assert automl.best_pipeline.threshold == 0.5
    assert automl.results['pipeline_results'][0]['cv_data'][0].get(
        'binary_classification_threshold') == 0.5
    assert automl.results['pipeline_results'][0]['cv_data'][1].get(
        'binary_classification_threshold') == 0.5
    assert automl.results['pipeline_results'][0]['cv_data'][2].get(
        'binary_classification_threshold') == 0.5

Пример #12

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_optimizable_threshold_enabled(mock_fit, mock_score,
                                       mock_predict_proba, mock_encode_targets,
                                       mock_optimize_threshold, X_y_binary,
                                       caplog):
    mock_optimize_threshold.return_value = 0.8
    X, y = X_y_binary
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          objective='precision',
                          max_iterations=1,
                          optimize_thresholds=True)
    mock_score.return_value = {'precision': 1.0}
    automl.search()
    mock_fit.assert_called()
    mock_score.assert_called()
    mock_predict_proba.assert_called()
    mock_optimize_threshold.assert_called()
    assert automl.best_pipeline.threshold == 0.8
    assert automl.results['pipeline_results'][0]['cv_data'][0].get(
        'binary_classification_threshold') == 0.8
    assert automl.results['pipeline_results'][0]['cv_data'][1].get(
        'binary_classification_threshold') == 0.8
    assert automl.results['pipeline_results'][0]['cv_data'][2].get(
        'binary_classification_threshold') == 0.8

    automl.describe_pipeline(0)
    out = caplog.text
    assert "Objective to optimize binary classification pipeline thresholds for" in out

Пример #13

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_callback(X_y_binary):
    X, y = X_y_binary

    counts = {
        "start_iteration_callback": 0,
        "add_result_callback": 0,
    }

    def start_iteration_callback(pipeline_class,
                                 parameters,
                                 automl_obj,
                                 counts=counts):
        counts["start_iteration_callback"] += 1

    def add_result_callback(results,
                            trained_pipeline,
                            automl_obj,
                            counts=counts):
        counts["add_result_callback"] += 1

    max_iterations = 3
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          objective=Precision(),
                          max_iterations=max_iterations,
                          start_iteration_callback=start_iteration_callback,
                          add_result_callback=add_result_callback,
                          n_jobs=1)
    automl.search()

    assert counts["start_iteration_callback"] == len(
        get_estimators('binary')) + 1
    assert counts["add_result_callback"] == max_iterations

Пример #14

0

Показать файл

def test_automl_time_series_classification_pickle_generated_pipeline(
        mock_binary_fit, mock_multi_fit, mock_binary_score,
        mock_multiclass_score, problem_type, X_y_binary, X_y_multi):
    if problem_type == ProblemTypes.TIME_SERIES_BINARY:
        X, y = X_y_binary
        pipeline = GeneratedPipelineTimeSeriesBinary
    else:
        X, y = X_y_multi
        pipeline = GeneratedPipelineTimeSeriesMulticlass

    configuration = {
        "gap": 0,
        "max_delay": 0,
        'delay_target': False,
        'delay_features': True
    }
    a = AutoMLSearch(X_train=X,
                     y_train=y,
                     problem_type=problem_type,
                     problem_configuration=configuration)
    a.search()

    for i, row in a.rankings.iterrows():
        assert a.get_pipeline(row['id']).__class__ == pipeline
        assert pickle.loads(pickle.dumps(a.get_pipeline(row['id'])))

Пример #15

0

Показать файл

def test_callback(X_y_regression):
    X, y = X_y_regression

    counts = {
        "start_iteration_callback": 0,
        "add_result_callback": 0,
    }

    def start_iteration_callback(pipeline_class,
                                 parameters,
                                 automl_obj,
                                 counts=counts):
        counts["start_iteration_callback"] += 1

    def add_result_callback(results,
                            trained_pipeline,
                            automl_obj,
                            counts=counts):
        counts["add_result_callback"] += 1

    max_iterations = 3
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='regression',
                          objective="R2",
                          max_iterations=max_iterations,
                          start_iteration_callback=start_iteration_callback,
                          add_result_callback=add_result_callback,
                          n_jobs=1)
    automl.search()

    assert counts["start_iteration_callback"] == max_iterations
    assert counts["add_result_callback"] == max_iterations

Пример #16

0

Показать файл

def test_automl_supports_time_series_regression(mock_fit, mock_score,
                                                X_y_regression):
    X, y = X_y_regression

    configuration = {
        "gap": 0,
        "max_delay": 0,
        'delay_target': False,
        'delay_features': True
    }

    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type="time series regression",
                          problem_configuration=configuration,
                          max_batches=2)
    automl.search()
    assert isinstance(automl.data_splitter, TimeSeriesSplit)
    for result in automl.results['pipeline_results'].values():
        if result["id"] == 0:
            assert result[
                'pipeline_class'] == TimeSeriesBaselineRegressionPipeline
            continue

        assert result['parameters'][
            'Delayed Feature Transformer'] == configuration
        assert result['parameters']['pipeline'] == configuration

Пример #17

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_automl_allowed_pipelines_init_allowed_both_not_specified_multi(
        mock_fit, mock_score, X_y_multi,
        assert_allowed_pipelines_equal_helper):
    X, y = X_y_multi
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='multiclass',
                          allowed_pipelines=None,
                          allowed_model_families=None)
    mock_score.return_value = {automl.objective.name: 1.0}
    expected_pipelines = [
        make_pipeline(X, y, estimator, ProblemTypes.MULTICLASS)
        for estimator in get_estimators(ProblemTypes.MULTICLASS,
                                        model_families=None)
    ]
    assert_allowed_pipelines_equal_helper(automl.allowed_pipelines,
                                          expected_pipelines)

    automl.search()
    assert_allowed_pipelines_equal_helper(automl.allowed_pipelines,
                                          expected_pipelines)
    assert set(automl.allowed_model_families) == set(
        [p.model_family for p in expected_pipelines])
    mock_fit.assert_called()
    mock_score.assert_called()

Пример #18

0

Показать файл

def test_early_stopping(caplog, linear_regression_pipeline_class,
                        X_y_regression):
    X, y = X_y_regression
    tolerance = 0.005
    patience = 2
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='regression',
                          objective='mse',
                          max_time='60 seconds',
                          patience=patience,
                          tolerance=tolerance,
                          allowed_model_families=['linear_model'],
                          random_state=0,
                          n_jobs=1)

    mock_results = {'search_order': [0, 1, 2], 'pipeline_results': {}}

    scores = [150, 200, 195]
    for id in mock_results['search_order']:
        mock_results['pipeline_results'][id] = {}
        mock_results['pipeline_results'][id]['score'] = scores[id]
        mock_results['pipeline_results'][id][
            'pipeline_class'] = linear_regression_pipeline_class

    automl._results = mock_results
    automl._check_stopping_condition(time.time())
    out = caplog.text
    assert "2 iterations without improvement. Stopping search early." in out

Пример #19

0

Показать файл

Файл: test_automl_search_classification.py Проект: joalmjoalm/evalml

def test_automl_time_series_classification_threshold(mock_binary_fit, mock_binary_score, mock_predict_proba, mock_optimize_threshold, mock_split_data,
                                                     optimize, objective, X_y_binary):
    X, y = X_y_binary
    mock_binary_score.return_value = {objective: 0.4}
    problem_type = 'time series binary'

    configuration = {"gap": 0, "max_delay": 0, 'delay_target': False, 'delay_features': True}

    mock_optimize_threshold.return_value = 0.62
    mock_split_data.return_value = split_data(X, y, problem_type, test_size=0.2, random_state=0)
    automl = AutoMLSearch(X_train=X, y_train=y, problem_type=problem_type,
                          problem_configuration=configuration, objective=objective, optimize_thresholds=optimize,
                          max_batches=2)
    automl.search()
    assert isinstance(automl.data_splitter, TimeSeriesSplit)
    if objective == 'Log Loss Binary':
        mock_optimize_threshold.assert_not_called()
        assert automl.best_pipeline.threshold is None
        mock_split_data.assert_not_called()
    elif optimize and objective == 'F1':
        mock_optimize_threshold.assert_called()
        assert automl.best_pipeline.threshold == 0.62
        mock_split_data.assert_called()
        assert str(mock_split_data.call_args[0][2]) == problem_type
    elif not optimize and objective == 'F1':
        mock_optimize_threshold.assert_not_called()
        assert automl.best_pipeline.threshold == 0.5
        mock_split_data.assert_not_called()

Пример #20

0

Показать файл

Файл: test_automl_search_classification.py Проект: joalmjoalm/evalml

def test_automl_supports_time_series_classification(mock_binary_fit, mock_multi_fit, mock_binary_score, mock_multiclass_score,
                                                    problem_type, X_y_binary, X_y_multi):
    if problem_type == ProblemTypes.TIME_SERIES_BINARY:
        X, y = X_y_binary
        baseline = TimeSeriesBaselineBinaryPipeline
        mock_binary_score.return_value = {"Log Loss Binary": 0.2}
        problem_type = 'time series binary'
    else:
        X, y = X_y_multi
        baseline = TimeSeriesBaselineMulticlassPipeline
        mock_multiclass_score.return_value = {"Log Loss Multiclass": 0.25}
        problem_type = 'time series multiclass'

    configuration = {"gap": 0, "max_delay": 0, 'delay_target': False, 'delay_features': True}

    automl = AutoMLSearch(X_train=X, y_train=y, problem_type=problem_type,
                          problem_configuration=configuration,
                          max_batches=2)
    automl.search()
    assert isinstance(automl.data_splitter, TimeSeriesSplit)
    for result in automl.results['pipeline_results'].values():
        if result["id"] == 0:
            assert result['pipeline_class'] == baseline
            continue

        assert result['parameters']['Delayed Feature Transformer'] == configuration
        assert result['parameters']['pipeline'] == configuration

Пример #21

0

Показать файл

def test_lead_scoring_objective(X_y_binary):
    X, y = X_y_binary

    objective = LeadScoring(true_positives=1, false_positives=-1)

    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          objective=objective,
                          max_iterations=1,
                          random_seed=0)
    automl.search()
    pipeline = automl.best_pipeline
    pipeline.fit(X, y)
    pipeline.predict(X)
    pipeline.predict_proba(X)
    pipeline.score(X, y, [objective])

    predicted = pd.Series([1, 10, .5, 5])
    out = objective.decision_function(predicted, 1)
    y_true = pd.Series([False, True, False, True])
    assert out.tolist() == [False, True, False, True]

    predicted = np.array([1, 10, .5, 5])
    out = objective.decision_function(predicted, 1)
    assert out.tolist() == y_true.to_list()

    score = objective.score(out, y_true)
    assert (score == 0.5)

Пример #22

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_automl_allowed_pipelines_specified_allowed_model_families_binary(
        mock_fit, mock_score, X_y_binary,
        assert_allowed_pipelines_equal_helper):
    X, y = X_y_binary
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          allowed_pipelines=None,
                          allowed_model_families=[ModelFamily.RANDOM_FOREST])
    mock_score.return_value = {automl.objective.name: 1.0}
    expected_pipelines = [
        make_pipeline(X, y, estimator, ProblemTypes.BINARY)
        for estimator in get_estimators(
            ProblemTypes.BINARY, model_families=[ModelFamily.RANDOM_FOREST])
    ]
    assert_allowed_pipelines_equal_helper(automl.allowed_pipelines,
                                          expected_pipelines)

    automl.search()
    assert_allowed_pipelines_equal_helper(automl.allowed_pipelines,
                                          expected_pipelines)
    assert set(automl.allowed_model_families) == set(
        [ModelFamily.RANDOM_FOREST])
    mock_fit.assert_called()
    mock_score.assert_called()

    mock_fit.reset_mock()
    mock_score.reset_mock()
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          allowed_pipelines=None,
                          allowed_model_families=['random_forest'])
    expected_pipelines = [
        make_pipeline(X, y, estimator, ProblemTypes.BINARY)
        for estimator in get_estimators(
            ProblemTypes.BINARY, model_families=[ModelFamily.RANDOM_FOREST])
    ]
    assert_allowed_pipelines_equal_helper(automl.allowed_pipelines,
                                          expected_pipelines)
    automl.search()
    assert_allowed_pipelines_equal_helper(automl.allowed_pipelines,
                                          expected_pipelines)
    assert set(automl.allowed_model_families) == set(
        [ModelFamily.RANDOM_FOREST])
    mock_fit.assert_called()
    mock_score.assert_called()

Пример #23

0

Показать файл

def test_automl_allowed_pipelines_no_allowed_pipelines(X_y_regression):
    X, y = X_y_regression
    with pytest.raises(ValueError, match="No allowed pipelines to search"):
        AutoMLSearch(X_train=X,
                     y_train=y,
                     problem_type='regression',
                     allowed_pipelines=None,
                     allowed_model_families=[])

Пример #24

0

Показать файл

def test_automl_allowed_pipelines_no_allowed_pipelines(automl_type, X_y_binary, X_y_multi):
    is_multiclass = automl_type == ProblemTypes.MULTICLASS
    X, y = X_y_multi if is_multiclass else X_y_binary
    problem_type = 'multiclass' if is_multiclass else 'binary'
    automl = AutoMLSearch(X_train=X, y_train=y, problem_type=problem_type, allowed_pipelines=None, allowed_model_families=[])
    assert automl.allowed_pipelines is None
    with pytest.raises(ValueError, match="No allowed pipelines to search"):
        automl.search()

Пример #25

0

Показать файл

Файл: test_automl_search_regression.py Проект: baagie7/evalml

def test_automl_time_series_regression_pickle_generated_pipeline(mock_fit, mock_score, X_y_regression):
    X, y = X_y_regression
    configuration = {"gap": 0, "max_delay": 0, 'delay_target': False, 'delay_features': True}
    a = AutoMLSearch(X_train=X, y_train=y, problem_type="time series regression", problem_configuration=configuration)
    a.search()

    for i, row in a.rankings.iterrows():
        assert a.get_pipeline(row['id']).__class__ == GeneratedPipelineTimeSeriesRegression
        assert pickle.loads(pickle.dumps(a.get_pipeline(row['id'])))

Пример #26

0

Показать файл

Файл: test_automl_search_regression.py Проект: baagie7/evalml

def test_plot_disabled_missing_dependency(X_y_regression, has_minimal_dependencies):
    X, y = X_y_regression

    automl = AutoMLSearch(X_train=X, y_train=y, problem_type='regression', max_iterations=3)
    if has_minimal_dependencies:
        with pytest.raises(AttributeError):
            automl.plot.search_iteration_plot
    else:
        automl.plot.search_iteration_plot

Пример #27

0

Показать файл

Файл: test_automl_search_classification.py Проект: joalmjoalm/evalml

def test_binary_auto(X_y_binary):
    X, y = X_y_binary
    automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', objective="Log Loss Binary", max_iterations=5, n_jobs=1)
    automl.search()

    best_pipeline = automl.best_pipeline
    assert best_pipeline._is_fitted
    y_pred = best_pipeline.predict(X)
    assert len(np.unique(y_pred.to_series())) == 2

Пример #28

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_recall_error(X_y_binary):
    X, y = X_y_binary
    # Recall is a valid objective but it's not allowed in AutoML so a ValueError is expected
    error_msg = 'recall is not allowed in AutoML!'
    with pytest.raises(ValueError, match=error_msg):
        AutoMLSearch(X_train=X,
                     y_train=y,
                     problem_type='binary',
                     objective='recall',
                     max_iterations=1)

Пример #29

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_categorical_classification(X_y_categorical_classification):
    X, y = X_y_categorical_classification
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          objective="precision",
                          max_iterations=5,
                          n_jobs=1)
    automl.search()
    assert not automl.rankings["mean_cv_score"].isnull().all()

Пример #30

0

Показать файл

Файл: test_automl_search_classification.py Проект: sujala/evalml

def test_max_time(X_y_binary):
    X, y = X_y_binary
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          max_time=1e-16,
                          n_jobs=1)
    automl.search()
    # search will always run at least one pipeline
    assert len(automl.results['pipeline_results']) == 1

Python AutoMLSearch.AutoMLSearch примеры использования