示例#1
0
    def test_model_selection_with_pipeline_works_as_expected(
        self,
        pipeline_logistic: Pipeline,
        pipeline_dummy_classifier: Pipeline,
        train_iris_dataset,
    ):
        estimators = [pipeline_logistic, pipeline_dummy_classifier]
        best_estimator, results = Model.test_estimators(
            data=train_iris_dataset, estimators=estimators, metrics="accuracy")

        assert best_estimator.estimator == estimators[0]
示例#2
0
 def test_model_selection_with_nonstandard_metric_works_as_expected(
         self, train_iris_dataset):
     estimators = [
         LogisticRegression(solver="liblinear"),
         RandomForestClassifier(n_estimators=10),
     ]
     best_estimator, results = Model.test_estimators(train_iris_dataset,
                                                     estimators,
                                                     metrics="roc_auc")
     for result in results:
         assert "roc_auc" in result.metrics
示例#3
0
    def test_model_selection_refits_final_model(self, train_iris_dataset):
        estimators = [LogisticRegression(solver="liblinear")]

        model = LogisticRegression(solver="liblinear").fit(
            train_iris_dataset.train_x, train_iris_dataset.train_y)
        model2, results2 = Model.test_estimators(train_iris_dataset,
                                                 estimators,
                                                 cv=2,
                                                 refit=True,
                                                 metrics="accuracy")

        assert np.all(model.coef_ == model2.estimator.coef_)
示例#4
0
    def test_model_selection_works_with_default_metric(self,
                                                       train_iris_dataset):
        models = [
            LogisticRegression(solver="liblinear"),
            RandomForestClassifier(n_estimators=2),
        ]
        best_model, results = Model.test_estimators(train_iris_dataset, models)

        assert models[1] is best_model.estimator
        assert 2 == len(results)
        assert results[0].metrics[0].name == "accuracy"
        assert results[1].metrics[0].name == "accuracy"
示例#5
0
    def test_test_models_logs_when_given_dir(self, tmp_path: pathlib.Path,
                                             train_iris_dataset):
        test_models_log = tmp_path / "test_estimators"
        Model.test_estimators(
            train_iris_dataset,
            [
                RandomForestClassifier(n_estimators=10),
                DummyClassifier(strategy="prior"),
            ],
            log_dir=str(test_models_log),
            metrics="accuracy",
        )

        for file in test_models_log.rglob("*.yaml"):
            with file.open() as f:
                result = yaml.safe_load(f)
                model_name = result["model_name"]
                assert model_name in {
                    "IrisData_RandomForestClassifier",
                    "IrisData_DummyClassifier",
                }
示例#6
0
    def test_model_selection_works_with_multiple_metrics(
            self, train_iris_dataset):
        models = [
            LogisticRegression(solver="liblinear"),
            RandomForestClassifier(n_estimators=2),
        ]
        best_model, results = Model.test_estimators(
            train_iris_dataset, models, metrics=["accuracy", "roc_auc"])

        assert models[1] is best_model.estimator
        assert 2 == len(results)
        assert 2 == len(results[0].metrics)
        assert 2 == len(results[1].metrics)
示例#7
0
 def test_model_selection_works_as_expected(self, train_iris_dataset):
     models = [
         LogisticRegression(solver="liblinear"),
         RandomForestClassifier(n_estimators=10),
     ]
     best_model, results = Model.test_estimators(train_iris_dataset,
                                                 models,
                                                 metrics="accuracy")
     assert models[1] is best_model.estimator
     assert 2 == len(results)
     assert results[0].metrics[0].score >= results[1].metrics[0].score
     for result in results:
         assert isinstance(result, Result)
示例#8
0
 def test_model_selection_works_with_feature_pipeline(
         self, train_iris_dataset: Dataset):
     estimators = [
         RandomForestClassifier(),
         DummyClassifier(strategy="stratified")
     ]
     feature_pipeline = Pipeline([("scale", DFStandardScaler())])
     best_estimator, results = Model.test_estimators(
         data=train_iris_dataset,
         estimators=estimators,
         feature_pipeline=feature_pipeline,
     )
     expected = Pipeline([("features", feature_pipeline),
                          ("estimator", estimators[0])])
     assert best_estimator.estimator.get_params() == expected.get_params()