def test_save_estimator_with_prod_flag_saves_correctly( self, classifier: Model): mock_storage = MagicMock() classifier.save_estimator(mock_storage, prod=True) mock_storage.save.assert_called_once_with(classifier.estimator, "production_model.pkl", prod=True)
def test_train_model_errors_correctly_when_not_scored( self, pipeline_logistic: Pipeline, tmp_path: pathlib.Path, train_iris_dataset): model = Model(pipeline_logistic) with pytest.raises(MLToolingError, match="You haven't scored the estimator"): with model.log(str(tmp_path)): model.train_estimator(train_iris_dataset) model.save_estimator(FileStorage(tmp_path))
def test_can_list_estimators(self, classifier: Model, tmp_path: pathlib.Path): storage = FileStorage(tmp_path) for _ in range(3): classifier.save_estimator(storage) storage_context = FileStorage(tmp_path) filenames_list = Model.list_estimators(storage_context) for filename in filenames_list: assert filename.exists()
def test_can_save_with_model(self, classifier: Model, tmp_path: pathlib.Path): storage = FileStorage(tmp_path) expected_file = classifier.save_estimator(storage) assert expected_file.exists() storage_context = FileStorage(tmp_path) context_expected_file = classifier.save_estimator(storage_context) assert context_expected_file.exists()
def test_save_estimator_uses_default_storage_if_no_storage_is_passed( self, tmp_path: pathlib.Path, classifier: Model): classifier.config.ESTIMATOR_DIR = tmp_path classifier.save_estimator() models = classifier.config.default_storage.get_list() assert len(models) == 1 new_classifier = Model.load_estimator(models[0]) assert (classifier.estimator.get_params() == new_classifier.estimator.get_params())
def test_save_model_saves_pipeline_correctly(self, pipeline_logistic: Pipeline, tmp_path: pathlib.Path, train_iris_dataset): model = Model(pipeline_logistic) model.train_estimator(train_iris_dataset) saved_model_path = model.save_estimator(FileStorage(tmp_path)) assert saved_model_path.exists()
def test_regression_model_filename_is_generated_correctly( self, classifier: Model, tmp_path: pathlib.Path, train_iris_dataset): storage = FileStorage(tmp_path) saved_model_path = classifier.save_estimator(storage) assert saved_model_path.exists() assert datetime.datetime.strptime( saved_model_path.stem, f"{classifier.estimator_name}_%Y_%m_%d_%H_%M_%S_%f")
def test_can_load_with_model(self, classifier: Model, tmp_path: pathlib.Path): storage = FileStorage(tmp_path) expected_file = classifier.save_estimator(storage) assert expected_file.exists() loaded_file = classifier.load_estimator(expected_file, storage=storage) assert isinstance(loaded_file, Model) storage_context = FileStorage(tmp_path) context_loaded_file = classifier.load_estimator( expected_file, storage=storage_context) assert isinstance(context_loaded_file, Model)
def train_model(year, month, day, graphs=True, clf=RandomForestRegressor()): dataset = AirBnBDataset(year=year, month=month, day=day) dataset.create_train_test() model = Model(clf, feature_pipeline=features) result = model.score_estimator(dataset) model.config.N_JOBS = 6 with model.log("randomforest"): model.save_estimator() if graphs: result.plot.feature_importance() plt.savefig(VISUALIZATIONS / "confusion_matrix.png") result.plot.residuals() plt.savefig(VISUALIZATIONS / "residuals.png") result.plot.prediction_error() plt.savefig(VISUALIZATIONS / "prediction_error.png") return result
def test_save_estimator_saves_logging_dir_correctly( self, mock_hash: MagicMock, classifier: Model, tmp_path: pathlib.Path): mock_hash.return_value = "1234" with classifier.log(str(tmp_path)): expected_file = classifier.save_estimator(FileStorage(tmp_path)) assert expected_file.exists() assert ("LogisticRegression" in [str(file) for file in tmp_path.rglob("*.yaml")][0]) mock_hash.assert_called_once()
def test_regression_model_can_be_saved(self, classifier: Model, tmp_path: pathlib.Path, train_iris_dataset): classifier.score_estimator(train_iris_dataset) load_storage = FileStorage(tmp_path) storage = FileStorage(tmp_path) saved_model_path = classifier.save_estimator(storage) assert saved_model_path.exists() loaded_model = classifier.load_estimator(saved_model_path, storage=load_storage) assert loaded_model.estimator.get_params( ) == classifier.estimator.get_params()