Пример #1
0
def test_get_experiment_id_from_env():
    # When no env variables are set
    HelperEnv.assert_values(None, None)
    assert _get_experiment_id_from_env() is None

    # set only ID
    random_id = random.randint(1, 1e6)
    HelperEnv.set_values(experiment_id=random_id)
    HelperEnv.assert_values(str(random_id), None)
    assert _get_experiment_id_from_env() == str(random_id)

    # set only name
    with TempDir(chdr=True):
        name = "random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(name)
        assert exp_id is not None
        HelperEnv.set_values(name=name)
        HelperEnv.assert_values(None, name)
        assert _get_experiment_id_from_env() == exp_id

    # set both: assert that name variable takes precedence
    with TempDir(chdr=True):
        name = "random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(name)
        assert exp_id is not None
        random_id = random.randint(1, 1e6)
        HelperEnv.set_values(name=name, experiment_id=random_id)
        HelperEnv.assert_values(str(random_id), name)
        assert _get_experiment_id_from_env() == exp_id
Пример #2
0
def test_log_artifacts():
    for artifact_path in [None, "sub_dir", "very/nested/sub/dir"]:
        file_content_1 = 'A simple test artifact\nThe artifact is located in: ' + str(
            artifact_path)
        file_content_2 = os.urandom(300)

        file1 = "meta.yaml"
        directory = "saved_model"
        file2 = "sk_model.pickle"
        with TempDir() as local, TempDir() as remote:
            with open(os.path.join(local.path(), file1), "w") as f:
                f.write(file_content_1)
            os.mkdir(os.path.join(local.path(), directory))
            with open(os.path.join(local.path(), directory, file2), "wb") as f:
                f.write(file_content_2)

            sftp_path = "sftp://" + remote.path()
            store = SFTPArtifactRepository(sftp_path)
            store.log_artifacts(local.path(), artifact_path)

            remote_dir = posixpath.join(
                remote.path(), '.' if artifact_path is None else artifact_path)
            assert posixpath.isdir(remote_dir)
            assert posixpath.isdir(posixpath.join(remote_dir, directory))
            assert posixpath.isfile(posixpath.join(remote_dir, file1))
            assert posixpath.isfile(
                posixpath.join(remote_dir, directory, file2))

            with open(posixpath.join(remote_dir, file1),
                      'r') as remote_content:
                assert remote_content.read() == file_content_1

            with open(posixpath.join(remote_dir, directory, file2),
                      'rb') as remote_content:
                assert remote_content.read() == file_content_2
Пример #3
0
def test_serving_model_with_schema(pandas_df_with_all_types):
    class TestModel(PythonModel):
        def predict(self, context, model_input):
            return [[k, str(v)] for k, v in model_input.dtypes.items()]

    schema = Schema([ColSpec(c, c) for c in pandas_df_with_all_types.columns])
    df = _shuffle_pdf(pandas_df_with_all_types)
    with TempDir(chdr=True):
        with kiwi.start_run() as run:
            kiwi.pyfunc.log_model("model",
                                  python_model=TestModel(),
                                  signature=ModelSignature(schema))
        response = pyfunc_serve_and_score_model(
            model_uri="runs:/{}/model".format(run.info.run_id),
            data=json.dumps(df.to_dict(orient="split"), cls=NumpyEncoder),
            content_type=pyfunc_scoring_server.
            CONTENT_TYPE_JSON_SPLIT_ORIENTED,
            extra_args=["--no-conda"])
        response_json = json.loads(response.content)
        assert response_json == [
            [k, str(v)] for k, v in pandas_df_with_all_types.dtypes.items()
        ]
        response = pyfunc_serve_and_score_model(
            model_uri="runs:/{}/model".format(run.info.run_id),
            data=json.dumps(pandas_df_with_all_types.to_dict(orient="records"),
                            cls=NumpyEncoder),
            content_type=pyfunc_scoring_server.
            CONTENT_TYPE_JSON_RECORDS_ORIENTED,
            extra_args=["--no-conda"])
        response_json = json.loads(response.content)
        assert response_json == [
            [k, str(v)] for k, v in pandas_df_with_all_types.dtypes.items()
        ]
Пример #4
0
def test_prepare_env_passes(sk_model):
    if no_conda:
        pytest.skip("This test requires conda.")

    with TempDir(chdr=True):
        with kiwi.start_run() as active_run:
            kiwi.sklearn.log_model(sk_model, "model")
            model_uri = "runs:/{run_id}/model".format(
                run_id=active_run.info.run_id)

        # Test with no conda
        p = subprocess.Popen(
            ["mlflow", "models", "prepare-env", "-m", model_uri, "--no-conda"],
            stderr=subprocess.PIPE)
        assert p.wait() == 0

        # With conda
        p = subprocess.Popen(
            ["mlflow", "models", "prepare-env", "-m", model_uri],
            stderr=subprocess.PIPE)
        assert p.wait() == 0

        # Should be idempotent
        p = subprocess.Popen(
            ["mlflow", "models", "prepare-env", "-m", model_uri],
            stderr=subprocess.PIPE)
        assert p.wait() == 0
Пример #5
0
def test_signature_and_examples_are_saved_correctly(sklearn_knn_model,
                                                    iris_data):
    data = iris_data
    signature_ = infer_signature(*data)
    example_ = data[0][:3, ]
    for signature in (None, signature_):
        for example in (None, example_):
            with TempDir() as tmp:
                with open(tmp.path("skmodel"), "wb") as f:
                    pickle.dump(sklearn_knn_model, f)
                path = tmp.path("model")
                kiwi.pyfunc.save_model(
                    path=path,
                    data_path=tmp.path("skmodel"),
                    loader_module=os.path.basename(__file__)[:-3],
                    code_path=[__file__],
                    signature=signature,
                    input_example=example)
                mlflow_model = Model.load(path)
                assert signature == mlflow_model.signature
                if example is None:
                    assert mlflow_model.saved_input_example_info is None
                else:
                    assert all((_read_example(mlflow_model,
                                              path) == example).all())
Пример #6
0
def test_model_log(spacy_model_with_data, tracking_uri_mock):  # pylint: disable=unused-argument
    spacy_model = spacy_model_with_data.model
    old_uri = kiwi.get_tracking_uri()
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        with TempDir(chdr=True, remove_on_exit=True):
            try:
                artifact_path = "model"
                if should_start_run:
                    kiwi.start_run()
                kiwi.spacy.log_model(spacy_model=spacy_model,
                                     artifact_path=artifact_path)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=kiwi.active_run().info.run_id,
                    artifact_path=artifact_path)

                # Load model
                spacy_model_loaded = kiwi.spacy.load_model(model_uri=model_uri)
                assert all(
                    _predict(spacy_model, spacy_model_with_data.inference_data)
                    == _predict(spacy_model_loaded,
                                spacy_model_with_data.inference_data))
            finally:
                kiwi.end_run()
                kiwi.set_tracking_uri(old_uri)
Пример #7
0
def test_log_artifacts(hdfs_system_mock):
    os.environ['MLFLOW_KERBEROS_TICKET_CACHE'] = '/tmp/krb5cc_22222222'
    os.environ['MLFLOW_KERBEROS_USER'] = '******'

    repo = HdfsArtifactRepository('hdfs:/some_path/maybe/path')

    with TempDir() as root_dir:
        with open(root_dir.path("file_one.txt"), "w") as f:
            f.write('PyArrow Works once')

        os.mkdir(root_dir.path("subdir"))
        with open(root_dir.path("subdir/file_two.txt"), "w") as f:
            f.write('PyArrow Works two')

        repo.log_artifacts(root_dir._path)

        hdfs_system_mock.assert_called_once_with(extra_conf=None,
                                                 host='default',
                                                 kerb_ticket='/tmp/krb5cc_22222222', port=0,
                                                 user='******')

        open_mock = hdfs_system_mock.return_value.open
        open_mock.assert_has_calls(calls=[call('/some_path/maybe/path/file_one.txt', 'wb'),
                                          call('/some_path/maybe/path/subdir/file_two.txt', 'wb')],
                                   any_order=True)
        write_mock = open_mock.return_value.__enter__.return_value.write
        write_mock.assert_has_calls(calls=[call(b'PyArrow Works once'),
                                           call(b'PyArrow Works two')],
                                    any_order=True)
Пример #8
0
def docker_example_base_image():
    mlflow_home = os.environ.get("MLFLOW_HOME", None)
    if not mlflow_home:
        raise Exception(
            "MLFLOW_HOME environment variable is not set. Please set the variable to "
            "point to your mlflow dev root.")
    with TempDir() as tmp:
        cwd = tmp.path()
        mlflow_dir = _copy_project(src_path=mlflow_home, dst_path=cwd)
        import shutil
        shutil.copy(os.path.join(TEST_DOCKER_PROJECT_DIR, "Dockerfile"),
                    tmp.path("Dockerfile"))
        with open(tmp.path("Dockerfile"), "a") as f:
            f.write(("COPY {mlflow_dir} /opt/mlflow\n"
                     "RUN pip install -U -e /opt/mlflow\n").format(
                         mlflow_dir=mlflow_dir))

        client = docker.from_env()
        try:
            client.images.build(tag='mlflow-docker-example',
                                forcerm=True,
                                nocache=True,
                                dockerfile='Dockerfile',
                                path=cwd)
        except BuildError as build_error:
            for chunk in build_error.build_log:
                print(chunk)
            raise build_error
        except APIError as api_error:
            print(api_error.explanation)
            raise api_error
Пример #9
0
def test_serve_gunicorn_opts(iris_data, sk_model):
    if sys.platform == "win32":
        pytest.skip(
            "This test requires gunicorn which is not available on windows.")
    with kiwi.start_run() as active_run:
        kiwi.sklearn.log_model(sk_model,
                               "model",
                               registered_model_name="imlegit")
        run_id = active_run.info.run_id

    model_uris = [
        "models:/{name}/{stage}".format(name="imlegit", stage="None"),
        "runs:/{run_id}/model".format(run_id=run_id)
    ]
    for model_uri in model_uris:
        with TempDir() as tpm:
            output_file_path = tpm.path("stoudt")
            with open(output_file_path, "w") as output_file:
                x, _ = iris_data
                scoring_response = pyfunc_serve_and_score_model(
                    model_uri,
                    pd.DataFrame(x),
                    content_type=CONTENT_TYPE_JSON_SPLIT_ORIENTED,
                    stdout=output_file,
                    extra_args=["-w", "3"])
            with open(output_file_path, "r") as output_file:
                stdout = output_file.read()
        actual = pd.read_json(scoring_response.content, orient="records")
        actual = actual[actual.columns[0]].values
        expected = sk_model.predict(x)
        assert all(expected == actual)
        expected_command_pattern = re.compile(
            ("gunicorn.*-w 3.*mlflow.pyfunc.scoring_server.wsgi:app"))
        assert expected_command_pattern.search(stdout) is not None
def test_signature_and_examples_are_saved_correctly(iris_data,
                                                    main_scoped_model_class):
    def test_predict(sk_model, model_input):
        return sk_model.predict(model_input) * 2

    data = iris_data
    signature_ = infer_signature(*data)
    example_ = data[0][:3, ]
    for signature in (None, signature_):
        for example in (None, example_):
            with TempDir() as tmp:
                path = tmp.path("model")
                kiwi.pyfunc.save_model(
                    path=path,
                    artifacts={},
                    python_model=main_scoped_model_class(test_predict),
                    signature=signature,
                    input_example=example)
                mlflow_model = Model.load(path)
                assert signature == mlflow_model.signature
                if example is None:
                    assert mlflow_model.saved_input_example_info is None
                else:
                    assert all((_read_example(mlflow_model,
                                              path) == example).all())
Пример #11
0
def _upload_s3(local_model_path, bucket, prefix, region_name, s3_client):
    """
    Upload dir to S3 as .tar.gz.
    :param local_model_path: Local path to a dir.
    :param bucket: S3 bucket where to store the data.
    :param prefix: Path within the bucket.
    :param region_name: The AWS region in which to upload data to S3.
    :param s3_client: A boto3 client for S3.
    :return: S3 path of the uploaded artifact.
    """
    import boto3
    sess = boto3.Session(region_name=region_name)
    with TempDir() as tmp:
        model_data_file = tmp.path("model.tar.gz")
        _make_tarfile(model_data_file, local_model_path)
        with open(model_data_file, 'rb') as fobj:
            key = os.path.join(prefix, 'model.tar.gz')
            obj = sess.resource('s3').Bucket(bucket).Object(key)
            obj.upload_fileobj(fobj)
            response = s3_client.put_object_tagging(
                Bucket=bucket,
                Key=key,
                Tagging={'TagSet': [
                    {
                        'Key': 'SageMaker',
                        'Value': 'true'
                    },
                ]})
            _logger.info('tag response: %s', response)
            return '{}/{}/{}'.format(s3_client.meta.endpoint_url, bucket, key)
Пример #12
0
def test_cli_build_image_with_relative_model_path_calls_expected_azure_routines(
        sklearn_model):
    with TempDir(chdr=True):
        model_path = "model"
        kiwi.sklearn.save_model(sk_model=sklearn_model, path=model_path)

        with AzureMLMocks() as aml_mocks:
            result = CliRunner(env={
                "LC_ALL": "en_US.UTF-8",
                "LANG": "en_US.UTF-8"
            }).invoke(kiwi.azureml.cli.commands, [
                'build-image',
                '-m',
                model_path,
                '-w',
                'test_workspace',
                '-i',
                'image_name',
                '-n',
                'model_name',
            ])
            assert result.exit_code == 0

            assert aml_mocks["register_model"].call_count == 1
            assert aml_mocks["create_image"].call_count == 1
            assert aml_mocks["load_workspace"].call_count == 1
Пример #13
0
def test_build_image_includes_mlflow_home_as_file_dependency_if_specified(
        sklearn_model, model_path):
    def mock_create_dockerfile(output_path, *args, **kwargs):
        # pylint: disable=unused-argument
        with open(output_path, "w") as f:
            f.write("Dockerfile contents")

    kiwi.sklearn.save_model(sk_model=sklearn_model, path=model_path)
    with AzureMLMocks() as aml_mocks, TempDir() as tmp,\
            mock.patch("mlflow.azureml._create_dockerfile") as create_dockerfile_mock:
        create_dockerfile_mock.side_effect = mock_create_dockerfile

        # Write a mock `setup.py` file to the mlflow home path so that it will be recognized
        # as a viable MLflow source directory during the image build process
        mlflow_home = tmp.path()
        with open(os.path.join(mlflow_home, "setup.py"), "w") as f:
            f.write("setup instructions")

        workspace = get_azure_workspace()
        kiwi.azureml.build_image(model_uri=model_path,
                                 workspace=workspace,
                                 mlflow_home=mlflow_home)

        assert len(create_dockerfile_mock.call_args_list) == 1
        _, create_dockerfile_kwargs = create_dockerfile_mock.call_args_list[0]
        # The path to MLflow that is referenced by the Docker container may differ from the
        # user-specified `mlflow_home` path if the directory is copied before image building
        # for safety
        dockerfile_mlflow_path = create_dockerfile_kwargs["mlflow_path"]

        create_image_call_args = aml_mocks["create_image"].call_args_list
        assert len(create_image_call_args) == 1
        _, create_image_call_kwargs = create_image_call_args[0]
        image_config = create_image_call_kwargs["image_config"]
        assert dockerfile_mlflow_path in image_config.dependencies
Пример #14
0
def test_model_log(h2o_iris_model):
    h2o_model = h2o_iris_model.model
    old_uri = kiwi.get_tracking_uri()
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        with TempDir(chdr=True, remove_on_exit=True):
            try:
                artifact_path = "gbm_model"
                kiwi.set_tracking_uri("test")
                if should_start_run:
                    kiwi.start_run()
                kiwi.h2o.log_model(h2o_model=h2o_model,
                                   artifact_path=artifact_path)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=kiwi.active_run().info.run_id,
                    artifact_path=artifact_path)

                # Load model
                h2o_model_loaded = kiwi.h2o.load_model(model_uri=model_uri)
                assert all(
                    h2o_model_loaded.predict(h2o_iris_model.inference_data).
                    as_data_frame() == h2o_model.predict(
                        h2o_iris_model.inference_data).as_data_frame())
            finally:
                kiwi.end_run()
                kiwi.set_tracking_uri(old_uri)
Пример #15
0
def test_get_experiment_by_name():
    with TempDir(chdr=True):
        name = "Random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(name)

        experiment = kiwi.get_experiment_by_name(name)
        assert experiment.experiment_id == exp_id
Пример #16
0
def test_predict_with_old_mlflow_in_conda_and_with_orient_records(iris_data):
    if no_conda:
        pytest.skip("This test needs conda.")
    # TODO: Enable this test after 1.0 is out to ensure we do not break the serve / predict
    # TODO: Also add a test for serve, not just predict.
    pytest.skip("TODO: enable this after 1.0 release is out.")
    x, _ = iris_data
    with TempDir() as tmp:
        input_records_path = tmp.path("input_records.json")
        pd.DataFrame(x).to_json(input_records_path, orient="records")
        output_json_path = tmp.path("output.json")
        test_model_path = tmp.path("test_model")
        test_model_conda_path = tmp.path("conda.yml")
        # create env with old mlflow!
        _mlflow_conda_env(path=test_model_conda_path,
                          additional_pip_deps=[
                              "mlflow=={}".format(test_pyfunc.MLFLOW_VERSION)
                          ])
        pyfunc.save_model(path=test_model_path,
                          loader_module=test_pyfunc.__name__.split(".")[-1],
                          code_path=[test_pyfunc.__file__],
                          conda_env=test_model_conda_path)
        # explicit json format with orient records
        p = subprocess.Popen([
            "mlflow", "models", "predict", "-m",
            path_to_local_file_uri(test_model_path), "-i", input_records_path,
            "-o", output_json_path, "-t", "json", "--json-format", "records"
        ] + no_conda)
        assert 0 == p.wait()
        actual = pd.read_json(output_json_path, orient="records")
        actual = actual[actual.columns[0]].values
        expected = test_pyfunc.PyFuncTestModel(check_version=False).predict(
            df=pd.DataFrame(x))
        assert all(expected == actual)
Пример #17
0
def test_path_params():
    data_file = "s3://path.test/resources/data_file.csv"
    defaults = {
        "constants": {"type": "uri", "default": "s3://path.test/b1"},
        "data": {"type": "path", "default": data_file}
    }
    entry_point = EntryPoint("entry_point_name", defaults, "command_name script.py")

    with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \
            as download_uri_mock:
        final_1, extra_1 = entry_point.compute_parameters({}, None)
        assert (final_1 == {"constants": "s3://path.test/b1", "data": data_file})
        assert (extra_1 == {})
        assert download_uri_mock.call_count == 0

    with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \
            as download_uri_mock:
        user_2 = {"alpha": 0.001, "constants": "s3://path.test/b_two"}
        final_2, extra_2 = entry_point.compute_parameters(user_2, None)
        assert (final_2 == {"constants": "s3://path.test/b_two", "data": data_file})
        assert (extra_2 == {"alpha": "0.001"})
        assert download_uri_mock.call_count == 0

    with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \
            as download_uri_mock, TempDir() as tmp:
        dest_path = tmp.path()
        download_path = "%s/data_file.csv" % dest_path
        download_uri_mock.return_value = download_path
        user_3 = {"alpha": 0.001}
        final_3, extra_3 = entry_point.compute_parameters(user_3, dest_path)
        assert (final_3 == {"constants": "s3://path.test/b1",
                            "data": download_path})
        assert (extra_3 == {"alpha": "0.001"})
        assert download_uri_mock.call_count == 1

    with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \
            as download_uri_mock, TempDir() as tmp:
        dest_path = tmp.path()
        download_path = "%s/images.tgz" % dest_path
        download_uri_mock.return_value = download_path
        user_4 = {"data": "s3://another.example.test/data_stash/images.tgz"}
        final_4, extra_4 = entry_point.compute_parameters(user_4, dest_path)
        assert (final_4 == {"constants": "s3://path.test/b1",
                            "data": download_path})
        assert (extra_4 == {})
        assert download_uri_mock.call_count == 1
Пример #18
0
def test_dir_create():
    with TempDir() as tmp:
        file_path = tmp.path("test_file.txt")
        create_dir = tmp.path("test_dir2/")
        with open(file_path, 'a') as f:
            f.write("testing")
        name = _copy_file_or_tree(file_path, file_path, create_dir)
        assert filecmp.cmp(file_path, name)
Пример #19
0
def test_hidden_files_are_logged_correctly(local_artifact_repo):
    with TempDir() as local_dir:
        hidden_file = local_dir.path(".mystery")
        with open(hidden_file, "w") as f:
            f.write("42")
        local_artifact_repo.log_artifact(hidden_file)
        assert open(
            local_artifact_repo.download_artifacts(".mystery")).read() == "42"
Пример #20
0
def test_path_parameter():
    """
    Tests that MLflow file-download APIs get called when necessary for arguments of type `path`.
    """
    project = load_project()
    entry_point = project.get_entry_point("line_count")
    with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \
            as download_uri_mock:
        download_uri_mock.return_value = 0
        # Verify that we don't attempt to call download_uri when passing a local file to a
        # parameter of type "path"
        with TempDir() as tmp:
            dst_dir = tmp.path()
            local_path = os.path.join(TEST_PROJECT_DIR, "MLproject")
            params, _ = entry_point.compute_parameters(
                user_parameters={"path": local_path},
                storage_dir=dst_dir)
            assert params["path"] == os.path.abspath(local_path)
            assert download_uri_mock.call_count == 0

            params, _ = entry_point.compute_parameters(
                user_parameters={"path": path_to_local_file_uri(local_path)},
                storage_dir=dst_dir)
            assert params["path"] == os.path.abspath(local_path)
            assert download_uri_mock.call_count == 0

        # Verify that we raise an exception when passing a non-existent local file to a
        # parameter of type "path"
        with TempDir() as tmp, pytest.raises(ExecutionException):
            dst_dir = tmp.path()
            entry_point.compute_parameters(
                user_parameters={"path": os.path.join(dst_dir, "some/nonexistent/file")},
                storage_dir=dst_dir)
        # Verify that we do call `download_uri` when passing a URI to a parameter of type "path"
        for i, prefix in enumerate(["dbfs:/", "s3://", "gs://"]):
            with TempDir() as tmp:
                dst_dir = tmp.path()
                file_to_download = 'images.tgz'
                download_path = "%s/%s" % (dst_dir, file_to_download)
                download_uri_mock.return_value = download_path
                params, _ = entry_point.compute_parameters(
                    user_parameters={"path": os.path.join(prefix, file_to_download)},
                    storage_dir=dst_dir)
                assert params["path"] == download_path
                assert download_uri_mock.call_count == i + 1
Пример #21
0
def test_dir_copy():
    with TempDir() as tmp:
        dir_path = tmp.path("test_dir1/")
        copy_path = tmp.path("test_dir2")
        os.mkdir(dir_path)
        with open(os.path.join(dir_path, "test_file.txt"), 'a') as f:
            f.write("testing")
        _copy_file_or_tree(dir_path, copy_path, "")
        assert filecmp.dircmp(dir_path, copy_path)
Пример #22
0
def test_file_copy():
    with TempDir() as tmp:
        file_path = tmp.path("test_file.txt")
        copy_path = tmp.path("test_dir1/")
        os.mkdir(copy_path)
        with open(file_path, 'a') as f:
            f.write("testing")
        _copy_file_or_tree(file_path, copy_path, "")
        assert filecmp.cmp(file_path, os.path.join(copy_path, "test_file.txt"))
Пример #23
0
def test_get_experiment_id_with_active_experiment_returns_active_experiment_id(
):
    # Create a new experiment and set that as active experiment
    with TempDir(chdr=True):
        name = "Random experiment %d" % random.randint(1, 1e6)
        exp_id = kiwi.create_experiment(name)
        assert exp_id is not None
        kiwi.set_experiment(name)
        assert _get_experiment_id() == exp_id
Пример #24
0
def test_log_artifact_throws_exception_for_invalid_artifact_paths(
        local_artifact_repo):
    with TempDir() as local_dir:
        for bad_artifact_path in [
                "/", "//", "/tmp", "/bad_path", ".", "../terrible_path"
        ]:
            with pytest.raises(MlflowException) as exc_info:
                local_artifact_repo.log_artifact(local_dir.path(),
                                                 bad_artifact_path)
            assert "Invalid artifact path" in str(exc_info)
Пример #25
0
    def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs):
        """
        Log model using supplied flavor module. If no run is active, this method will create a new
        active run.

        :param artifact_path: Run relative path identifying the model.
        :param flavor: Flavor module to save the model with. The module must have
                       the ``save_model`` function that will persist the model as a valid
                       MLflow model.
        :param registered_model_name: (Experimental) If given, create a model version under
                                      ``registered_model_name``, also creating a registered model if
                                      one with the given name does not exist.
        :param signature: (Experimental) :py:class:`ModelSignature` describes model input
                          and output :py:class:`Schema <mlflow.types.Schema>`. The model signature
                          can be :py:func:`inferred <infer_signature>` from datasets representing
                          valid model input (e.g. the training dataset) and valid model output
                          (e.g. model predictions generated on the training dataset), for example:

                          .. code-block:: python

                            from mlflow.models.signature import infer_signature
                            train = df.drop_column("target_label")
                            signature = infer_signature(train, model.predict(train))

        :param input_example: (Experimental) Input example provides one or several examples of
                              valid model input. The example can be used as a hint of what data to
                              feed the model. The given example will be converted to a Pandas
                              DataFrame and then serialized to json using the Pandas split-oriented
                              format. Bytes are base64-encoded.

        :param kwargs: Extra args passed to the model flavor.
        """
        with TempDir() as tmp:
            local_path = tmp.path("model")
            run_id = kiwi.tracking.fluent._get_or_start_run().info.run_id
            mlflow_model = cls(artifact_path=artifact_path, run_id=run_id)
            flavor.save_model(path=local_path, mlflow_model=mlflow_model,
                              **kwargs)
            kiwi.tracking.fluent.log_artifacts(local_path, artifact_path)
            try:
                kiwi.tracking.fluent._record_logged_model(mlflow_model)
            except MlflowException:
                # We need to swallow all mlflow exceptions to maintain backwards compatibility with
                # older tracking servers. Only print out a warning for now.
                _logger.warning(
                    "Logging model metadata to the tracking server has failed, possibly due older "
                    "server version. The model artifacts have been logged successfully under %s. "
                    "In addition to exporting model artifacts, MLflow clients 1.7.0 and above "
                    "attempt to record model metadata to the  tracking store. If logging to a "
                    "mlflow server via REST, consider  upgrading the server version to MLflow "
                    "1.7.0 or above.", kiwi.get_artifact_uri())
            if registered_model_name is not None:
                run_id = kiwi.tracking.fluent.active_run().info.run_id
                kiwi.register_model("runs:/%s/%s" % (run_id, artifact_path),
                                    registered_model_name)
Пример #26
0
def test_log_model_no_registered_model_name(xgb_model):
    artifact_path = "model"
    register_model_patch = mock.patch("mlflow.register_model")
    with kiwi.start_run(), register_model_patch, TempDir(
            chdr=True, remove_on_exit=True) as tmp:
        conda_env = os.path.join(tmp.path(), "conda_env.yaml")
        _mlflow_conda_env(conda_env, additional_pip_deps=["xgboost"])
        kiwi.xgboost.log_model(xgb_model=xgb_model.model,
                               artifact_path=artifact_path,
                               conda_env=conda_env)
        kiwi.register_model.assert_not_called()
Пример #27
0
def test_deploy_with_relative_model_path_calls_expected_azure_routines(
        sklearn_model):
    with TempDir(chdr=True):
        model_path = "model"
        kiwi.sklearn.save_model(sk_model=sklearn_model, path=model_path)
        with AzureMLMocks() as aml_mocks:
            workspace = get_azure_workspace()
            kiwi.azureml.deploy(model_uri=model_path, workspace=workspace)

            assert aml_mocks["register_model"].call_count == 1
            assert aml_mocks["model_deploy"].call_count == 1
Пример #28
0
def test_download_artifacts():
    expected_data = b"hello"
    artifact_path = "test.txt"
    # mock hdfs
    hdfs = mock.Mock()
    hdfs.open = mock_open(read_data=expected_data)

    with TempDir() as tmp_dir:
        _download_hdfs_file(hdfs, artifact_path,
                            os.path.join(tmp_dir.path(), artifact_path))
        with open(os.path.join(tmp_dir.path(), artifact_path), "rb") as fd:
            assert expected_data == fd.read()
Пример #29
0
def test_log_model_calls_register_model(xgb_model):
    artifact_path = "model"
    register_model_patch = mock.patch("mlflow.register_model")
    with kiwi.start_run(), register_model_patch, TempDir(
            chdr=True, remove_on_exit=True) as tmp:
        conda_env = os.path.join(tmp.path(), "conda_env.yaml")
        _mlflow_conda_env(conda_env, additional_pip_deps=["xgboost"])
        kiwi.xgboost.log_model(xgb_model=xgb_model.model,
                               artifact_path=artifact_path,
                               conda_env=conda_env,
                               registered_model_name="AdsModel1")
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path)
        kiwi.register_model.assert_called_once_with(model_uri, "AdsModel1")
Пример #30
0
def test_uri_parameter():
    """Tests parameter resolution for parameters of type `uri`."""
    project = load_project()
    entry_point = project.get_entry_point("download_uri")
    with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \
            as download_uri_mock, TempDir() as tmp:
        dst_dir = tmp.path()
        # Test that we don't attempt to locally download parameters of type URI
        entry_point.compute_command(user_parameters={"uri": "file://%s" % dst_dir},
                                    storage_dir=dst_dir)
        assert download_uri_mock.call_count == 0
        # Test that we raise an exception if a local path is passed to a parameter of type URI
        with pytest.raises(ExecutionException):
            entry_point.compute_command(user_parameters={"uri": dst_dir}, storage_dir=dst_dir)