Exemplo n.º 1
0
def test_log_artifact_with_invalid_local_dir(_):
    repo = HdfsArtifactRepository('hdfs://host_name:8020/maybe/path')

    with pytest.raises(
            Exception,
            match="No such file or directory: '/not/existing/local/path'"):
        repo.log_artifact('/not/existing/local/path', 'test_hdfs/some/path')
Exemplo n.º 2
0
def test_log_artifacts(hdfs_system_mock):
    os.environ['MLFLOW_KERBEROS_TICKET_CACHE'] = '/tmp/krb5cc_22222222'
    os.environ['MLFLOW_KERBEROS_USER'] = '******'

    repo = HdfsArtifactRepository('hdfs:/some_path/maybe/path')

    with TempDir() as root_dir:
        with open(root_dir.path("file_one.txt"), "w") as f:
            f.write('PyArrow Works once')

        os.mkdir(root_dir.path("subdir"))
        with open(root_dir.path("subdir/file_two.txt"), "w") as f:
            f.write('PyArrow Works two')

        repo.log_artifacts(root_dir._path)

        hdfs_system_mock.assert_called_once_with(
            extra_conf=None,
            host='default',
            kerb_ticket='/tmp/krb5cc_22222222',
            port=0,
            user='******')

        open_mock = hdfs_system_mock.return_value.open
        open_mock.assert_has_calls(calls=[
            call('/some_path/maybe/path/file_one.txt', 'wb'),
            call('/some_path/maybe/path/subdir/file_two.txt', 'wb')
        ],
                                   any_order=True)
        write_mock = open_mock.return_value.__enter__.return_value.write
        write_mock.assert_has_calls(
            calls=[call(b'PyArrow Works once'),
                   call(b'PyArrow Works two')],
            any_order=True)
Exemplo n.º 3
0
def test_list_artifacts_nested(hdfs_system_mock):
    repo = HdfsArtifactRepository("hdfs:://host/some/path")

    expected = [
        FileInfo("model/conda.yaml", False, 33),
        FileInfo("model/model.pkl", False, 33),
        FileInfo("model/MLmodel", False, 33),
    ]

    hdfs_system_mock.return_value.ls.return_value = [
        {
            "kind": "file",
            "name": "hdfs://host/some/path/model/conda.yaml",
            "size": 33
        },
        {
            "kind": "file",
            "name": "hdfs://host/some/path/model/model.pkl",
            "size": 33
        },
        {
            "kind": "file",
            "name": "hdfs://host/some/path/model/MLmodel",
            "size": 33
        },
    ]

    actual = repo.list_artifacts("model")

    assert actual == expected
Exemplo n.º 4
0
def test_list_artifacts_nested(hdfs_system_mock):
    repo = HdfsArtifactRepository('hdfs:://host/some/path')

    expected = [
        FileInfo('model/conda.yaml', False, 33),
        FileInfo('model/model.pkl', False, 33),
        FileInfo('model/MLmodel', False, 33)
    ]

    hdfs_system_mock.return_value.ls.return_value = [{
        'kind': 'file',
        'name': 'hdfs://host/some/path/model/conda.yaml',
        'size': 33,
    }, {
        'kind': 'file',
        'name': 'hdfs://host/some/path/model/model.pkl',
        'size': 33,
    }, {
        'kind': 'file',
        'name': 'hdfs://host/some/path/model/MLmodel',
        'size': 33,
    }]

    actual = repo.list_artifacts('model')

    assert actual == expected
Exemplo n.º 5
0
def test_log_artifact_with_kerberos_setup(hdfs_system_mock):
    if sys.platform == "win32":
        pytest.skip()
    os.environ["MLFLOW_KERBEROS_TICKET_CACHE"] = "/tmp/krb5cc_22222222"
    os.environ["MLFLOW_KERBEROS_USER"] = "******"

    repo = HdfsArtifactRepository("hdfs:/some/maybe/path")

    with NamedTemporaryFile() as tmp_local_file:
        tmp_local_file.write(b"PyArrow Works")
        tmp_local_file.seek(0)

        repo.log_artifact(tmp_local_file.name, "test_hdfs/some/path")

        hdfs_system_mock.assert_called_once_with(
            extra_conf=None,
            host="default",
            kerb_ticket="/tmp/krb5cc_22222222",
            port=0,
            user="******",
        )

        # TODO: refactor this magic ...
        write_mock = hdfs_system_mock.return_value.open.return_value.__enter__.return_value.write
        write_mock.assert_called_once_with(b"PyArrow Works")
def test_log_artifact_with_kerberos_setup(hdfs_system_mock):
    if sys.platform == 'win32':
        pytest.skip()
    os.environ['MLFLOW_KERBEROS_TICKET_CACHE'] = '/tmp/krb5cc_22222222'
    os.environ['MLFLOW_KERBEROS_USER'] = '******'
    os.environ['MLFLOW_HDFS_DRIVER'] = 'libhdfs3'

    repo = HdfsArtifactRepository('hdfs:/some/maybe/path')

    with NamedTemporaryFile() as tmp_local_file:
        tmp_local_file.write(b'PyArrow Works')
        tmp_local_file.seek(0)

        repo.log_artifact(tmp_local_file.name, 'test_hdfs/some/path')

        hdfs_system_mock.assert_called_once_with(
            driver='libhdfs3',
            extra_conf=None,
            host='default',
            kerb_ticket='/tmp/krb5cc_22222222',
            port=0,
            user='******')

        # TODO: refactor this magic ...
        write_mock = hdfs_system_mock.return_value.open.return_value.__enter__.return_value.write
        write_mock.assert_called_once_with(b'PyArrow Works')
Exemplo n.º 7
0
def test_list_artifacts_root(hdfs_system_mock):
    repo = HdfsArtifactRepository('hdfs://host/some/path')

    expected = [FileInfo('model', True, 0)]

    hdfs_system_mock.return_value.ls.return_value = [{
        'kind': 'directory',
        'name': 'hdfs://host/some/path/model',
        'size': 0,
    }]

    actual = repo.list_artifacts()

    assert actual == expected
Exemplo n.º 8
0
def test_list_artifacts_root(hdfs_system_mock):
    repo = HdfsArtifactRepository("hdfs://host/some/path")

    expected = [FileInfo("model", True, 0)]

    hdfs_system_mock.return_value.ls.return_value = [{
        "kind": "directory",
        "name": "hdfs://host/some/path/model",
        "size": 0
    }]

    actual = repo.list_artifacts()

    assert actual == expected
Exemplo n.º 9
0
def test_log_artifact_viewfs(hdfs_system_mock):
    repo = HdfsArtifactRepository("viewfs://host_name/mypath")

    with TempDir() as tmp_dir:
        local_file = tmp_dir.path("sample_file")
        with open(local_file, "w") as f:
            f.write("PyArrow Works")

        repo.log_artifact(local_file, "more_path/some")

        hdfs_system_mock.assert_called_once_with(extra_conf=None,
                                                 host="viewfs://host_name",
                                                 kerb_ticket=None,
                                                 port=0,
                                                 user=None)
        upload_mock = hdfs_system_mock.return_value.upload
        upload_mock.assert_called_once_with(
            "/mypath/more_path/some/sample_file", ANY)
        args, _ = upload_mock.call_args
        assert isinstance(args[1], BufferedReader)
Exemplo n.º 10
0
def test_log_artifact(hdfs_system_mock):
    repo = HdfsArtifactRepository('hdfs://host_name:8020/hdfs/path')

    with TempDir() as tmp_dir:
        local_file = tmp_dir.path('sample_file')
        with open(local_file, "w") as f:
            f.write('PyArrow Works')

        repo.log_artifact(local_file, 'more_path/some')

        hdfs_system_mock.assert_called_once_with(driver='libhdfs', extra_conf=None,
                                                 host='host_name',
                                                 kerb_ticket=None, port=8020,
                                                 user=None)

        open_mock = hdfs_system_mock.return_value.open
        open_mock.assert_called_once_with('/hdfs/path/more_path/some/sample_file', 'wb')

        write_mock = open_mock.return_value.__enter__.return_value.write
        write_mock.assert_called_once_with(b'PyArrow Works')
Exemplo n.º 11
0
def test_log_artifacts(hdfs_system_mock):
    os.environ["MLFLOW_KERBEROS_TICKET_CACHE"] = "/tmp/krb5cc_22222222"
    os.environ["MLFLOW_KERBEROS_USER"] = "******"

    repo = HdfsArtifactRepository("hdfs:/some_path/maybe/path")

    with TempDir() as root_dir:
        with open(root_dir.path("file_one.txt"), "w") as f:
            f.write("PyArrow Works once")

        os.mkdir(root_dir.path("subdir"))
        with open(root_dir.path("subdir/file_two.txt"), "w") as f:
            f.write("PyArrow Works two")

        repo.log_artifacts(root_dir._path)

        hdfs_system_mock.assert_called_once_with(
            extra_conf=None,
            host="default",
            kerb_ticket="/tmp/krb5cc_22222222",
            port=0,
            user="******",
        )

        upload_mock = hdfs_system_mock.return_value.upload
        upload_mock.assert_has_calls(
            calls=[
                call("/some_path/maybe/path/file_one.txt", ANY),
                call("/some_path/maybe/path/subdir/file_two.txt", ANY),
            ],
            any_order=True,
        )
        call_args_list = upload_mock.call_args_list

        args, _ = call_args_list[0]
        assert isinstance(args[1], BufferedReader)

        args, _ = call_args_list[1]
        assert isinstance(args[1], BufferedReader)
Exemplo n.º 12
0
def test_log_artifact_viewfs(hdfs_system_mock):
    repo = HdfsArtifactRepository("viewfs://host_name/mypath")

    with TempDir() as tmp_dir:
        local_file = tmp_dir.path("sample_file")
        with open(local_file, "w") as f:
            f.write("PyArrow Works")

        repo.log_artifact(local_file, "more_path/some")

        hdfs_system_mock.assert_called_once_with(extra_conf=None,
                                                 host="viewfs://host_name",
                                                 kerb_ticket=None,
                                                 port=0,
                                                 user=None)

        open_mock = hdfs_system_mock.return_value.open
        open_mock.assert_called_once_with("/mypath/more_path/some/sample_file",
                                          "wb")

        write_mock = open_mock.return_value.__enter__.return_value.write
        write_mock.assert_called_once_with(b"PyArrow Works")
Exemplo n.º 13
0
def test_log_artifacts(hdfs_system_mock):
    os.environ["MLFLOW_KERBEROS_TICKET_CACHE"] = "/tmp/krb5cc_22222222"
    os.environ["MLFLOW_KERBEROS_USER"] = "******"

    repo = HdfsArtifactRepository("hdfs:/some_path/maybe/path")

    with TempDir() as root_dir:
        with open(root_dir.path("file_one.txt"), "w") as f:
            f.write("PyArrow Works once")

        os.mkdir(root_dir.path("subdir"))
        with open(root_dir.path("subdir/file_two.txt"), "w") as f:
            f.write("PyArrow Works two")

        repo.log_artifacts(root_dir._path)

        hdfs_system_mock.assert_called_once_with(
            extra_conf=None,
            host="default",
            kerb_ticket="/tmp/krb5cc_22222222",
            port=0,
            user="******",
        )

        open_mock = hdfs_system_mock.return_value.open
        open_mock.assert_has_calls(
            calls=[
                call("/some_path/maybe/path/file_one.txt", "wb"),
                call("/some_path/maybe/path/subdir/file_two.txt", "wb"),
            ],
            any_order=True,
        )
        write_mock = open_mock.return_value.__enter__.return_value.write
        write_mock.assert_has_calls(
            calls=[call(b"PyArrow Works once"),
                   call(b"PyArrow Works two")],
            any_order=True)
Exemplo n.º 14
0
def test_delete_artifacts(hdfs_system_mock):
    delete_mock = hdfs_system_mock.return_value.delete
    repo = HdfsArtifactRepository('hdfs:/some_path/maybe/path')
    repo.delete_artifacts('artifacts')
    delete_mock.assert_called_once_with('/some_path/maybe/path/artifacts',
                                        recursive=True)
Exemplo n.º 15
0
def test_list_artifacts_empty_hdfs_dir(hdfs_system_mock):
    hdfs_system_mock.return_value.exists.return_value = False

    repo = HdfsArtifactRepository('hdfs:/some_path/maybe/path')
    actual = repo.list_artifacts()
    assert actual == []