Пример #1
0
 def test_train_model_errors_correctly_when_not_scored(
         self, pipeline_logistic: Pipeline, tmp_path: pathlib.Path,
         train_iris_dataset):
     model = Model(pipeline_logistic)
     with pytest.raises(MLToolingError,
                        match="You haven't scored the estimator"):
         with model.log(str(tmp_path)):
             model.train_estimator(train_iris_dataset)
             model.save_estimator(FileStorage(tmp_path))
Пример #2
0
 def test_randomsearch_can_log_with_context_manager(
         self, feature_union_classifier, train_iris_dataset, tmp_path):
     classifier = Model(feature_union_classifier)
     classifier.config.RUN_DIR = tmp_path
     with classifier.log("randomsearch_union_test"):
         _, _ = classifier.randomsearch(
             train_iris_dataset,
             param_distributions={"estimator__penalty": ["l1", "l2"]},
             n_iter=2,
         )
Пример #3
0
 def test_gridsearch_can_log_with_context_manager(self,
                                                  feature_union_classifier,
                                                  train_iris_dataset,
                                                  tmp_path):
     classifier = Model(feature_union_classifier)
     classifier.config.RUN_DIR = tmp_path
     with classifier.log("gridsearch_union_test"):
         _, _ = classifier.gridsearch(
             train_iris_dataset,
             param_grid={"estimator__penalty": ["l1", "l2"]})
Пример #4
0
    def test_log_context_manager_works_as_expected(self, regression: Model):
        assert regression.config.LOG is False
        assert "runs" == regression.config.RUN_DIR.name
        with regression.log("test"):
            assert regression.config.LOG is True
            assert "test" == regression.config.RUN_DIR.name
            assert "runs" == regression.config.RUN_DIR.parent.name

        assert regression.config.LOG is False
        assert "runs" == regression.config.RUN_DIR.name
        assert "test" not in regression.config.RUN_DIR.parts
Пример #5
0
    def test_save_estimator_saves_logging_dir_correctly(
            self, mock_hash: MagicMock, classifier: Model,
            tmp_path: pathlib.Path):
        mock_hash.return_value = "1234"

        with classifier.log(str(tmp_path)):
            expected_file = classifier.save_estimator(FileStorage(tmp_path))

        assert expected_file.exists()
        assert ("LogisticRegression"
                in [str(file) for file in tmp_path.rglob("*.yaml")][0])
        mock_hash.assert_called_once()
Пример #6
0
    def test_log_context_manager_logs_when_scoring_model(
            self, tmp_path: pathlib.Path, train_iris_dataset):
        model = Model(LinearRegression())

        runs = tmp_path / "runs"
        with model.log(str(runs)):
            result = model.score_estimator(train_iris_dataset)

        for file in runs.rglob("LinearRegression_*"):
            with file.open() as f:
                log_result = yaml.safe_load(f)

            assert result.metrics.score == log_result["metrics"]["r2"]
            assert result.model.estimator_name == log_result["estimator_name"]
Пример #7
0
def train_model(year, month, day, graphs=True, clf=RandomForestRegressor()):
    dataset = AirBnBDataset(year=year, month=month, day=day)
    dataset.create_train_test()

    model = Model(clf, feature_pipeline=features)
    result = model.score_estimator(dataset)
    model.config.N_JOBS = 6
    with model.log("randomforest"):
        model.save_estimator()

    if graphs:
        result.plot.feature_importance()
        plt.savefig(VISUALIZATIONS / "confusion_matrix.png")

        result.plot.residuals()
        plt.savefig(VISUALIZATIONS / "residuals.png")

        result.plot.prediction_error()
        plt.savefig(VISUALIZATIONS / "prediction_error.png")

    return result