def test_get_experiment_id_from_env(): # When no env variables are set HelperEnv.assert_values(None, None) assert _get_experiment_id_from_env() is None # set only ID random_id = random.randint(1, 1e6) HelperEnv.set_values(experiment_id=random_id) HelperEnv.assert_values(str(random_id), None) assert _get_experiment_id_from_env() == str(random_id) # set only name with TempDir(chdr=True): name = "random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(name) assert exp_id is not None HelperEnv.set_values(name=name) HelperEnv.assert_values(None, name) assert _get_experiment_id_from_env() == exp_id # set both: assert that name variable takes precedence with TempDir(chdr=True): name = "random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(name) assert exp_id is not None random_id = random.randint(1, 1e6) HelperEnv.set_values(name=name, experiment_id=random_id) HelperEnv.assert_values(str(random_id), name) assert _get_experiment_id_from_env() == exp_id
def test_log_artifacts(): for artifact_path in [None, "sub_dir", "very/nested/sub/dir"]: file_content_1 = 'A simple test artifact\nThe artifact is located in: ' + str( artifact_path) file_content_2 = os.urandom(300) file1 = "meta.yaml" directory = "saved_model" file2 = "sk_model.pickle" with TempDir() as local, TempDir() as remote: with open(os.path.join(local.path(), file1), "w") as f: f.write(file_content_1) os.mkdir(os.path.join(local.path(), directory)) with open(os.path.join(local.path(), directory, file2), "wb") as f: f.write(file_content_2) sftp_path = "sftp://" + remote.path() store = SFTPArtifactRepository(sftp_path) store.log_artifacts(local.path(), artifact_path) remote_dir = posixpath.join( remote.path(), '.' if artifact_path is None else artifact_path) assert posixpath.isdir(remote_dir) assert posixpath.isdir(posixpath.join(remote_dir, directory)) assert posixpath.isfile(posixpath.join(remote_dir, file1)) assert posixpath.isfile( posixpath.join(remote_dir, directory, file2)) with open(posixpath.join(remote_dir, file1), 'r') as remote_content: assert remote_content.read() == file_content_1 with open(posixpath.join(remote_dir, directory, file2), 'rb') as remote_content: assert remote_content.read() == file_content_2
def test_serving_model_with_schema(pandas_df_with_all_types): class TestModel(PythonModel): def predict(self, context, model_input): return [[k, str(v)] for k, v in model_input.dtypes.items()] schema = Schema([ColSpec(c, c) for c in pandas_df_with_all_types.columns]) df = _shuffle_pdf(pandas_df_with_all_types) with TempDir(chdr=True): with kiwi.start_run() as run: kiwi.pyfunc.log_model("model", python_model=TestModel(), signature=ModelSignature(schema)) response = pyfunc_serve_and_score_model( model_uri="runs:/{}/model".format(run.info.run_id), data=json.dumps(df.to_dict(orient="split"), cls=NumpyEncoder), content_type=pyfunc_scoring_server. CONTENT_TYPE_JSON_SPLIT_ORIENTED, extra_args=["--no-conda"]) response_json = json.loads(response.content) assert response_json == [ [k, str(v)] for k, v in pandas_df_with_all_types.dtypes.items() ] response = pyfunc_serve_and_score_model( model_uri="runs:/{}/model".format(run.info.run_id), data=json.dumps(pandas_df_with_all_types.to_dict(orient="records"), cls=NumpyEncoder), content_type=pyfunc_scoring_server. CONTENT_TYPE_JSON_RECORDS_ORIENTED, extra_args=["--no-conda"]) response_json = json.loads(response.content) assert response_json == [ [k, str(v)] for k, v in pandas_df_with_all_types.dtypes.items() ]
def test_prepare_env_passes(sk_model): if no_conda: pytest.skip("This test requires conda.") with TempDir(chdr=True): with kiwi.start_run() as active_run: kiwi.sklearn.log_model(sk_model, "model") model_uri = "runs:/{run_id}/model".format( run_id=active_run.info.run_id) # Test with no conda p = subprocess.Popen( ["mlflow", "models", "prepare-env", "-m", model_uri, "--no-conda"], stderr=subprocess.PIPE) assert p.wait() == 0 # With conda p = subprocess.Popen( ["mlflow", "models", "prepare-env", "-m", model_uri], stderr=subprocess.PIPE) assert p.wait() == 0 # Should be idempotent p = subprocess.Popen( ["mlflow", "models", "prepare-env", "-m", model_uri], stderr=subprocess.PIPE) assert p.wait() == 0
def test_signature_and_examples_are_saved_correctly(sklearn_knn_model, iris_data): data = iris_data signature_ = infer_signature(*data) example_ = data[0][:3, ] for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: with open(tmp.path("skmodel"), "wb") as f: pickle.dump(sklearn_knn_model, f) path = tmp.path("model") kiwi.pyfunc.save_model( path=path, data_path=tmp.path("skmodel"), loader_module=os.path.basename(__file__)[:-3], code_path=[__file__], signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_model_log(spacy_model_with_data, tracking_uri_mock): # pylint: disable=unused-argument spacy_model = spacy_model_with_data.model old_uri = kiwi.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True): try: artifact_path = "model" if should_start_run: kiwi.start_run() kiwi.spacy.log_model(spacy_model=spacy_model, artifact_path=artifact_path) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) # Load model spacy_model_loaded = kiwi.spacy.load_model(model_uri=model_uri) assert all( _predict(spacy_model, spacy_model_with_data.inference_data) == _predict(spacy_model_loaded, spacy_model_with_data.inference_data)) finally: kiwi.end_run() kiwi.set_tracking_uri(old_uri)
def test_log_artifacts(hdfs_system_mock): os.environ['MLFLOW_KERBEROS_TICKET_CACHE'] = '/tmp/krb5cc_22222222' os.environ['MLFLOW_KERBEROS_USER'] = '******' repo = HdfsArtifactRepository('hdfs:/some_path/maybe/path') with TempDir() as root_dir: with open(root_dir.path("file_one.txt"), "w") as f: f.write('PyArrow Works once') os.mkdir(root_dir.path("subdir")) with open(root_dir.path("subdir/file_two.txt"), "w") as f: f.write('PyArrow Works two') repo.log_artifacts(root_dir._path) hdfs_system_mock.assert_called_once_with(extra_conf=None, host='default', kerb_ticket='/tmp/krb5cc_22222222', port=0, user='******') open_mock = hdfs_system_mock.return_value.open open_mock.assert_has_calls(calls=[call('/some_path/maybe/path/file_one.txt', 'wb'), call('/some_path/maybe/path/subdir/file_two.txt', 'wb')], any_order=True) write_mock = open_mock.return_value.__enter__.return_value.write write_mock.assert_has_calls(calls=[call(b'PyArrow Works once'), call(b'PyArrow Works two')], any_order=True)
def docker_example_base_image(): mlflow_home = os.environ.get("MLFLOW_HOME", None) if not mlflow_home: raise Exception( "MLFLOW_HOME environment variable is not set. Please set the variable to " "point to your mlflow dev root.") with TempDir() as tmp: cwd = tmp.path() mlflow_dir = _copy_project(src_path=mlflow_home, dst_path=cwd) import shutil shutil.copy(os.path.join(TEST_DOCKER_PROJECT_DIR, "Dockerfile"), tmp.path("Dockerfile")) with open(tmp.path("Dockerfile"), "a") as f: f.write(("COPY {mlflow_dir} /opt/mlflow\n" "RUN pip install -U -e /opt/mlflow\n").format( mlflow_dir=mlflow_dir)) client = docker.from_env() try: client.images.build(tag='mlflow-docker-example', forcerm=True, nocache=True, dockerfile='Dockerfile', path=cwd) except BuildError as build_error: for chunk in build_error.build_log: print(chunk) raise build_error except APIError as api_error: print(api_error.explanation) raise api_error
def test_serve_gunicorn_opts(iris_data, sk_model): if sys.platform == "win32": pytest.skip( "This test requires gunicorn which is not available on windows.") with kiwi.start_run() as active_run: kiwi.sklearn.log_model(sk_model, "model", registered_model_name="imlegit") run_id = active_run.info.run_id model_uris = [ "models:/{name}/{stage}".format(name="imlegit", stage="None"), "runs:/{run_id}/model".format(run_id=run_id) ] for model_uri in model_uris: with TempDir() as tpm: output_file_path = tpm.path("stoudt") with open(output_file_path, "w") as output_file: x, _ = iris_data scoring_response = pyfunc_serve_and_score_model( model_uri, pd.DataFrame(x), content_type=CONTENT_TYPE_JSON_SPLIT_ORIENTED, stdout=output_file, extra_args=["-w", "3"]) with open(output_file_path, "r") as output_file: stdout = output_file.read() actual = pd.read_json(scoring_response.content, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) expected_command_pattern = re.compile( ("gunicorn.*-w 3.*mlflow.pyfunc.scoring_server.wsgi:app")) assert expected_command_pattern.search(stdout) is not None
def test_signature_and_examples_are_saved_correctly(iris_data, main_scoped_model_class): def test_predict(sk_model, model_input): return sk_model.predict(model_input) * 2 data = iris_data signature_ = infer_signature(*data) example_ = data[0][:3, ] for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") kiwi.pyfunc.save_model( path=path, artifacts={}, python_model=main_scoped_model_class(test_predict), signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def _upload_s3(local_model_path, bucket, prefix, region_name, s3_client): """ Upload dir to S3 as .tar.gz. :param local_model_path: Local path to a dir. :param bucket: S3 bucket where to store the data. :param prefix: Path within the bucket. :param region_name: The AWS region in which to upload data to S3. :param s3_client: A boto3 client for S3. :return: S3 path of the uploaded artifact. """ import boto3 sess = boto3.Session(region_name=region_name) with TempDir() as tmp: model_data_file = tmp.path("model.tar.gz") _make_tarfile(model_data_file, local_model_path) with open(model_data_file, 'rb') as fobj: key = os.path.join(prefix, 'model.tar.gz') obj = sess.resource('s3').Bucket(bucket).Object(key) obj.upload_fileobj(fobj) response = s3_client.put_object_tagging( Bucket=bucket, Key=key, Tagging={'TagSet': [ { 'Key': 'SageMaker', 'Value': 'true' }, ]}) _logger.info('tag response: %s', response) return '{}/{}/{}'.format(s3_client.meta.endpoint_url, bucket, key)
def test_cli_build_image_with_relative_model_path_calls_expected_azure_routines( sklearn_model): with TempDir(chdr=True): model_path = "model" kiwi.sklearn.save_model(sk_model=sklearn_model, path=model_path) with AzureMLMocks() as aml_mocks: result = CliRunner(env={ "LC_ALL": "en_US.UTF-8", "LANG": "en_US.UTF-8" }).invoke(kiwi.azureml.cli.commands, [ 'build-image', '-m', model_path, '-w', 'test_workspace', '-i', 'image_name', '-n', 'model_name', ]) assert result.exit_code == 0 assert aml_mocks["register_model"].call_count == 1 assert aml_mocks["create_image"].call_count == 1 assert aml_mocks["load_workspace"].call_count == 1
def test_build_image_includes_mlflow_home_as_file_dependency_if_specified( sklearn_model, model_path): def mock_create_dockerfile(output_path, *args, **kwargs): # pylint: disable=unused-argument with open(output_path, "w") as f: f.write("Dockerfile contents") kiwi.sklearn.save_model(sk_model=sklearn_model, path=model_path) with AzureMLMocks() as aml_mocks, TempDir() as tmp,\ mock.patch("mlflow.azureml._create_dockerfile") as create_dockerfile_mock: create_dockerfile_mock.side_effect = mock_create_dockerfile # Write a mock `setup.py` file to the mlflow home path so that it will be recognized # as a viable MLflow source directory during the image build process mlflow_home = tmp.path() with open(os.path.join(mlflow_home, "setup.py"), "w") as f: f.write("setup instructions") workspace = get_azure_workspace() kiwi.azureml.build_image(model_uri=model_path, workspace=workspace, mlflow_home=mlflow_home) assert len(create_dockerfile_mock.call_args_list) == 1 _, create_dockerfile_kwargs = create_dockerfile_mock.call_args_list[0] # The path to MLflow that is referenced by the Docker container may differ from the # user-specified `mlflow_home` path if the directory is copied before image building # for safety dockerfile_mlflow_path = create_dockerfile_kwargs["mlflow_path"] create_image_call_args = aml_mocks["create_image"].call_args_list assert len(create_image_call_args) == 1 _, create_image_call_kwargs = create_image_call_args[0] image_config = create_image_call_kwargs["image_config"] assert dockerfile_mlflow_path in image_config.dependencies
def test_model_log(h2o_iris_model): h2o_model = h2o_iris_model.model old_uri = kiwi.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True): try: artifact_path = "gbm_model" kiwi.set_tracking_uri("test") if should_start_run: kiwi.start_run() kiwi.h2o.log_model(h2o_model=h2o_model, artifact_path=artifact_path) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) # Load model h2o_model_loaded = kiwi.h2o.load_model(model_uri=model_uri) assert all( h2o_model_loaded.predict(h2o_iris_model.inference_data). as_data_frame() == h2o_model.predict( h2o_iris_model.inference_data).as_data_frame()) finally: kiwi.end_run() kiwi.set_tracking_uri(old_uri)
def test_get_experiment_by_name(): with TempDir(chdr=True): name = "Random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(name) experiment = kiwi.get_experiment_by_name(name) assert experiment.experiment_id == exp_id
def test_predict_with_old_mlflow_in_conda_and_with_orient_records(iris_data): if no_conda: pytest.skip("This test needs conda.") # TODO: Enable this test after 1.0 is out to ensure we do not break the serve / predict # TODO: Also add a test for serve, not just predict. pytest.skip("TODO: enable this after 1.0 release is out.") x, _ = iris_data with TempDir() as tmp: input_records_path = tmp.path("input_records.json") pd.DataFrame(x).to_json(input_records_path, orient="records") output_json_path = tmp.path("output.json") test_model_path = tmp.path("test_model") test_model_conda_path = tmp.path("conda.yml") # create env with old mlflow! _mlflow_conda_env(path=test_model_conda_path, additional_pip_deps=[ "mlflow=={}".format(test_pyfunc.MLFLOW_VERSION) ]) pyfunc.save_model(path=test_model_path, loader_module=test_pyfunc.__name__.split(".")[-1], code_path=[test_pyfunc.__file__], conda_env=test_model_conda_path) # explicit json format with orient records p = subprocess.Popen([ "mlflow", "models", "predict", "-m", path_to_local_file_uri(test_model_path), "-i", input_records_path, "-o", output_json_path, "-t", "json", "--json-format", "records" ] + no_conda) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = test_pyfunc.PyFuncTestModel(check_version=False).predict( df=pd.DataFrame(x)) assert all(expected == actual)
def test_path_params(): data_file = "s3://path.test/resources/data_file.csv" defaults = { "constants": {"type": "uri", "default": "s3://path.test/b1"}, "data": {"type": "path", "default": data_file} } entry_point = EntryPoint("entry_point_name", defaults, "command_name script.py") with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \ as download_uri_mock: final_1, extra_1 = entry_point.compute_parameters({}, None) assert (final_1 == {"constants": "s3://path.test/b1", "data": data_file}) assert (extra_1 == {}) assert download_uri_mock.call_count == 0 with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \ as download_uri_mock: user_2 = {"alpha": 0.001, "constants": "s3://path.test/b_two"} final_2, extra_2 = entry_point.compute_parameters(user_2, None) assert (final_2 == {"constants": "s3://path.test/b_two", "data": data_file}) assert (extra_2 == {"alpha": "0.001"}) assert download_uri_mock.call_count == 0 with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \ as download_uri_mock, TempDir() as tmp: dest_path = tmp.path() download_path = "%s/data_file.csv" % dest_path download_uri_mock.return_value = download_path user_3 = {"alpha": 0.001} final_3, extra_3 = entry_point.compute_parameters(user_3, dest_path) assert (final_3 == {"constants": "s3://path.test/b1", "data": download_path}) assert (extra_3 == {"alpha": "0.001"}) assert download_uri_mock.call_count == 1 with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \ as download_uri_mock, TempDir() as tmp: dest_path = tmp.path() download_path = "%s/images.tgz" % dest_path download_uri_mock.return_value = download_path user_4 = {"data": "s3://another.example.test/data_stash/images.tgz"} final_4, extra_4 = entry_point.compute_parameters(user_4, dest_path) assert (final_4 == {"constants": "s3://path.test/b1", "data": download_path}) assert (extra_4 == {}) assert download_uri_mock.call_count == 1
def test_dir_create(): with TempDir() as tmp: file_path = tmp.path("test_file.txt") create_dir = tmp.path("test_dir2/") with open(file_path, 'a') as f: f.write("testing") name = _copy_file_or_tree(file_path, file_path, create_dir) assert filecmp.cmp(file_path, name)
def test_hidden_files_are_logged_correctly(local_artifact_repo): with TempDir() as local_dir: hidden_file = local_dir.path(".mystery") with open(hidden_file, "w") as f: f.write("42") local_artifact_repo.log_artifact(hidden_file) assert open( local_artifact_repo.download_artifacts(".mystery")).read() == "42"
def test_path_parameter(): """ Tests that MLflow file-download APIs get called when necessary for arguments of type `path`. """ project = load_project() entry_point = project.get_entry_point("line_count") with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \ as download_uri_mock: download_uri_mock.return_value = 0 # Verify that we don't attempt to call download_uri when passing a local file to a # parameter of type "path" with TempDir() as tmp: dst_dir = tmp.path() local_path = os.path.join(TEST_PROJECT_DIR, "MLproject") params, _ = entry_point.compute_parameters( user_parameters={"path": local_path}, storage_dir=dst_dir) assert params["path"] == os.path.abspath(local_path) assert download_uri_mock.call_count == 0 params, _ = entry_point.compute_parameters( user_parameters={"path": path_to_local_file_uri(local_path)}, storage_dir=dst_dir) assert params["path"] == os.path.abspath(local_path) assert download_uri_mock.call_count == 0 # Verify that we raise an exception when passing a non-existent local file to a # parameter of type "path" with TempDir() as tmp, pytest.raises(ExecutionException): dst_dir = tmp.path() entry_point.compute_parameters( user_parameters={"path": os.path.join(dst_dir, "some/nonexistent/file")}, storage_dir=dst_dir) # Verify that we do call `download_uri` when passing a URI to a parameter of type "path" for i, prefix in enumerate(["dbfs:/", "s3://", "gs://"]): with TempDir() as tmp: dst_dir = tmp.path() file_to_download = 'images.tgz' download_path = "%s/%s" % (dst_dir, file_to_download) download_uri_mock.return_value = download_path params, _ = entry_point.compute_parameters( user_parameters={"path": os.path.join(prefix, file_to_download)}, storage_dir=dst_dir) assert params["path"] == download_path assert download_uri_mock.call_count == i + 1
def test_dir_copy(): with TempDir() as tmp: dir_path = tmp.path("test_dir1/") copy_path = tmp.path("test_dir2") os.mkdir(dir_path) with open(os.path.join(dir_path, "test_file.txt"), 'a') as f: f.write("testing") _copy_file_or_tree(dir_path, copy_path, "") assert filecmp.dircmp(dir_path, copy_path)
def test_file_copy(): with TempDir() as tmp: file_path = tmp.path("test_file.txt") copy_path = tmp.path("test_dir1/") os.mkdir(copy_path) with open(file_path, 'a') as f: f.write("testing") _copy_file_or_tree(file_path, copy_path, "") assert filecmp.cmp(file_path, os.path.join(copy_path, "test_file.txt"))
def test_get_experiment_id_with_active_experiment_returns_active_experiment_id( ): # Create a new experiment and set that as active experiment with TempDir(chdr=True): name = "Random experiment %d" % random.randint(1, 1e6) exp_id = kiwi.create_experiment(name) assert exp_id is not None kiwi.set_experiment(name) assert _get_experiment_id() == exp_id
def test_log_artifact_throws_exception_for_invalid_artifact_paths( local_artifact_repo): with TempDir() as local_dir: for bad_artifact_path in [ "/", "//", "/tmp", "/bad_path", ".", "../terrible_path" ]: with pytest.raises(MlflowException) as exc_info: local_artifact_repo.log_artifact(local_dir.path(), bad_artifact_path) assert "Invalid artifact path" in str(exc_info)
def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs): """ Log model using supplied flavor module. If no run is active, this method will create a new active run. :param artifact_path: Run relative path identifying the model. :param flavor: Flavor module to save the model with. The module must have the ``save_model`` function that will persist the model as a valid MLflow model. :param registered_model_name: (Experimental) If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param signature: (Experimental) :py:class:`ModelSignature` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <infer_signature>` from datasets representing valid model input (e.g. the training dataset) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") signature = infer_signature(train, model.predict(train)) :param input_example: (Experimental) Input example provides one or several examples of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param kwargs: Extra args passed to the model flavor. """ with TempDir() as tmp: local_path = tmp.path("model") run_id = kiwi.tracking.fluent._get_or_start_run().info.run_id mlflow_model = cls(artifact_path=artifact_path, run_id=run_id) flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs) kiwi.tracking.fluent.log_artifacts(local_path, artifact_path) try: kiwi.tracking.fluent._record_logged_model(mlflow_model) except MlflowException: # We need to swallow all mlflow exceptions to maintain backwards compatibility with # older tracking servers. Only print out a warning for now. _logger.warning( "Logging model metadata to the tracking server has failed, possibly due older " "server version. The model artifacts have been logged successfully under %s. " "In addition to exporting model artifacts, MLflow clients 1.7.0 and above " "attempt to record model metadata to the tracking store. If logging to a " "mlflow server via REST, consider upgrading the server version to MLflow " "1.7.0 or above.", kiwi.get_artifact_uri()) if registered_model_name is not None: run_id = kiwi.tracking.fluent.active_run().info.run_id kiwi.register_model("runs:/%s/%s" % (run_id, artifact_path), registered_model_name)
def test_log_model_no_registered_model_name(xgb_model): artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with kiwi.start_run(), register_model_patch, TempDir( chdr=True, remove_on_exit=True) as tmp: conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["xgboost"]) kiwi.xgboost.log_model(xgb_model=xgb_model.model, artifact_path=artifact_path, conda_env=conda_env) kiwi.register_model.assert_not_called()
def test_deploy_with_relative_model_path_calls_expected_azure_routines( sklearn_model): with TempDir(chdr=True): model_path = "model" kiwi.sklearn.save_model(sk_model=sklearn_model, path=model_path) with AzureMLMocks() as aml_mocks: workspace = get_azure_workspace() kiwi.azureml.deploy(model_uri=model_path, workspace=workspace) assert aml_mocks["register_model"].call_count == 1 assert aml_mocks["model_deploy"].call_count == 1
def test_download_artifacts(): expected_data = b"hello" artifact_path = "test.txt" # mock hdfs hdfs = mock.Mock() hdfs.open = mock_open(read_data=expected_data) with TempDir() as tmp_dir: _download_hdfs_file(hdfs, artifact_path, os.path.join(tmp_dir.path(), artifact_path)) with open(os.path.join(tmp_dir.path(), artifact_path), "rb") as fd: assert expected_data == fd.read()
def test_log_model_calls_register_model(xgb_model): artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with kiwi.start_run(), register_model_patch, TempDir( chdr=True, remove_on_exit=True) as tmp: conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["xgboost"]) kiwi.xgboost.log_model(xgb_model=xgb_model.model, artifact_path=artifact_path, conda_env=conda_env, registered_model_name="AdsModel1") model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) kiwi.register_model.assert_called_once_with(model_uri, "AdsModel1")
def test_uri_parameter(): """Tests parameter resolution for parameters of type `uri`.""" project = load_project() entry_point = project.get_entry_point("download_uri") with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \ as download_uri_mock, TempDir() as tmp: dst_dir = tmp.path() # Test that we don't attempt to locally download parameters of type URI entry_point.compute_command(user_parameters={"uri": "file://%s" % dst_dir}, storage_dir=dst_dir) assert download_uri_mock.call_count == 0 # Test that we raise an exception if a local path is passed to a parameter of type URI with pytest.raises(ExecutionException): entry_point.compute_command(user_parameters={"uri": dst_dir}, storage_dir=dst_dir)