def test_get_git_hash_returns_empty_if_git_not_found(mock_subprocess): mock_subprocess.check_output.side_effect = OSError with pytest.warns(UserWarning, match="Error using git - is `git` installed?"): git_hash = get_git_hash() assert git_hash == "" mock_subprocess.check_output.assert_called_with( ["git", "rev-parse", "HEAD"])
def test_get_git_hash_returns_empty_and_emits_warning_if_git_not_found( mock_subprocess): mock_subprocess.check_output.side_effect = subprocess.CalledProcessError( 128, cmd=["git", "rev-parse", "HEAD"]) with pytest.warns( UserWarning, match= "Error using git - skipping git hash. Did you call `git init`?", ): git_hash = get_git_hash() assert git_hash == ""
def dump(self) -> dict: """ Creates a dictionary log of the model, including the serialized model, path to the saved estimator and scores Returns ------- dict Dictionary containing: * model_name * created_time * versions - ml_tooling -> version - sklearn -> version - pandas -> version * git_hash * metrics - metric -> score * estimator - name -> name of pipeline step - module -> name of module - classname -> name of class - params -> dict of params * estimator_path """ from ml_tooling import __version__ as ml_tools_version from sklearn import __version__ as sklearn_version from pandas import __version__ as pandas_version versions = { "ml_tooling": ml_tools_version, "sklearn": sklearn_version, "pandas": pandas_version, } data = { "model_name": self.name, "created_time": datetime.now(), "versions": versions, "git_hash": get_git_hash(), "metrics": self.metrics.to_dict(), "estimator": self.estimator, "estimator_path": str(self.estimator_path) if self.estimator_path else None, } return data
def test_get_git_hash_returns_correctly(): git_hash = get_git_hash() assert isinstance(git_hash, str) assert 10 < len(git_hash)