示例#1
0
    def test_can_score_estimator_with_no_y_value(self):
        class DummyEstimator(BaseEstimator, RegressorMixin):
            def __init__(self):
                self.average = None

            def fit(self, x, y=None):
                self.average = np.mean(x, axis=0)
                return self

            def predict(self, x):
                return self.average

        class DummyData(Dataset):
            def load_training_data(self):
                return pd.DataFrame({
                    "col1": [1, 2, 3, 4],
                    "col2": [4, 5, 6, 7]
                }), None

            def load_prediction_data(self, *args, **kwargs):
                return pd.DataFrame({
                    "col1": [1, 2, 3, 4],
                    "col2": [4, 5, 6, 7]
                })

        model = Model(DummyEstimator())
        data = DummyData()
        model.train_estimator(data)

        assert np.all(np.isclose(model.estimator.average, np.array([2.5,
                                                                    5.5])))

        with pytest.raises(DatasetError,
                           match="The dataset does not define a y value"):
            data.create_train_test()
示例#2
0
    def test_train_model_followed_by_score_model_returns_correctly(
            self, pipeline_logistic: Pipeline, train_iris_dataset):
        model = Model(pipeline_logistic)
        model.train_estimator(train_iris_dataset)
        model.score_estimator(train_iris_dataset)

        assert isinstance(model.result, Result)
示例#3
0
 def test_make_prediction_errors_if_asked_for_proba_without_predict_proba_method(
         self, train_iris_dataset: Dataset):
     with pytest.raises(
             MLToolingError,
             match="LinearRegression does not have a `predict_proba`"):
         model = Model(LinearRegression())
         model.train_estimator(train_iris_dataset)
         model.make_prediction(train_iris_dataset, 5, proba=True)
示例#4
0
 def test_save_model_saves_pipeline_correctly(self,
                                              pipeline_logistic: Pipeline,
                                              tmp_path: pathlib.Path,
                                              train_iris_dataset):
     model = Model(pipeline_logistic)
     model.train_estimator(train_iris_dataset)
     saved_model_path = model.save_estimator(FileStorage(tmp_path))
     assert saved_model_path.exists()
示例#5
0
 def test_train_model_errors_correctly_when_not_scored(
         self, pipeline_logistic: Pipeline, tmp_path: pathlib.Path,
         train_iris_dataset):
     model = Model(pipeline_logistic)
     with pytest.raises(MLToolingError,
                        match="You haven't scored the estimator"):
         with model.log(str(tmp_path)):
             model.train_estimator(train_iris_dataset)
             model.save_estimator(FileStorage(tmp_path))
    def test_load_prediction_data_works_as_expected(self):
        dataset = load_demo_dataset("iris")
        dataset.create_train_test(stratify=True)
        feature_pipeline = Pipeline([("scale", DFStandardScaler())])
        model = Model(LogisticRegression(), feature_pipeline=feature_pipeline)
        model.train_estimator(dataset)
        result = model.make_prediction(dataset, 5)

        expected = pd.DataFrame({"Prediction": [0]})
        pd.testing.assert_frame_equal(result, expected, check_dtype=False)
示例#7
0
    def test_make_prediction_with_regression_sqldataset_works_as_expected(
            self, boston_sqldataset, loaded_boston_db):
        dataset = boston_sqldataset(loaded_boston_db, schema=None)
        dataset.create_train_test(stratify=False)
        model = Model(LinearRegression())
        model.train_estimator(dataset)

        result = model.make_prediction(dataset, 0)

        assert result.shape == (1, 1)
        assert result.columns.tolist() == ["Prediction"]
示例#8
0
    def test_make_prediction_with_classification_sqldataset_works_as_expected(
            self, iris_sqldataset, loaded_iris_db):
        dataset = iris_sqldataset(loaded_iris_db, schema=None)
        dataset.create_train_test()
        model = Model(LogisticRegression(solver="lbfgs"))
        model.train_estimator(dataset)

        result = model.make_prediction(dataset, 0, proba=True)

        assert result.shape == (1, 2)
        assert result.columns.tolist() == [
            "Probability Class 0", "Probability Class 1"
        ]
示例#9
0
 def test_train_model_sets_result_to_none(self, regression: Model,
                                          train_iris_dataset):
     assert regression.result is not None
     regression.train_estimator(train_iris_dataset)
     assert regression.result is None