Пример #1
0
 def list_artifacts(self, path=None):
     if path:
         dbfs_path = self._get_dbfs_path(path)
     else:
         dbfs_path = self._get_dbfs_path('')
     dbfs_list_json = {'path': dbfs_path}
     response = self._dbfs_list_api(dbfs_list_json)
     try:
         json_response = json.loads(response.text)
     except ValueError:
         raise MlflowException(
             "API request to list files under DBFS path %s failed with status code %s. "
             "Response body: %s" % (dbfs_path, response.status_code, response.text))
     # /api/2.0/dbfs/list will not have the 'files' key in the response for empty directories.
     infos = []
     artifact_prefix = strip_prefix(self.artifact_uri, 'dbfs:')
     if json_response.get('error_code', None) == RESOURCE_DOES_NOT_EXIST:
         return []
     dbfs_files = json_response.get('files', [])
     for dbfs_file in dbfs_files:
         stripped_path = strip_prefix(dbfs_file['path'], artifact_prefix + '/')
         # If `path` is a file, the DBFS list API returns a single list element with the
         # same name as `path`. The list_artifacts API expects us to return an empty list in this
         # case, so we do so here.
         if stripped_path == path:
             return []
         is_dir = dbfs_file['is_dir']
         artifact_size = None if is_dir else dbfs_file['file_size']
         infos.append(FileInfo(stripped_path, is_dir, artifact_size))
     return sorted(infos, key=lambda f: f.path)
def copy_artifacts(artifact_uri, artifact_path):
    local_dir = "/dbfs/%s/%s" % (strip_prefix(
        artifact_uri.rstrip('/'), 'dbfs:/'), strip_prefix(artifact_path, '/'))
    artifact_path = artifact_path or ''
    for (dirpath, _, filenames) in os.walk(local_dir):
        artifact_subdir = artifact_path
        if dirpath != local_dir:
            rel_path = os.path.relpath(dirpath, local_dir)
            rel_path = relative_path_to_artifact_path(rel_path)
            artifact_subdir = posixpath.join(artifact_path, rel_path)
        for name in filenames:
            file_path = os.path.join(dirpath, name)
            _copy_artifact(file_path, artifact_uri, artifact_subdir)
Пример #3
0
def dbfs_artifact_repo_factory(artifact_uri):
    """
    Returns an ArtifactRepository subclass for storing artifacts on DBFS.

    This factory method is used with URIs of the form ``dbfs:/<path>``. DBFS-backed artifact
    storage can only be used together with the RestStore.

    In the special case where the URI is of the form
    `dbfs:/databricks/mlflow-tracking/<Exp-ID>/<Run-ID>/<path>',
    a DatabricksArtifactRepository is returned. This is capable of storing access controlled
    artifacts.

    :param artifact_uri: DBFS root artifact URI (string).
    :return: Subclass of ArtifactRepository capable of storing artifacts on DBFS.
    """
    cleaned_artifact_uri = artifact_uri.rstrip('/')
    uri_scheme = get_uri_scheme(artifact_uri)
    if uri_scheme != 'dbfs':
        raise MlflowException(
            "DBFS URI must be of the form "
            "dbfs:/<path>, but received {uri}".format(uri=artifact_uri))
    if is_databricks_acled_artifacts_uri(artifact_uri):
        return DatabricksArtifactRepository(cleaned_artifact_uri)
    elif mlflow.utils.databricks_utils.is_dbfs_fuse_available() \
            and os.environ.get(USE_FUSE_ENV_VAR, "").lower() != "false" \
            and not artifact_uri.startswith("dbfs:/databricks/mlflow-registry"):
        # If the DBFS FUSE mount is available, write artifacts directly to /dbfs/... using
        # local filesystem APIs
        file_uri = "file:///dbfs/{}".format(
            strip_prefix(cleaned_artifact_uri, "dbfs:/"))
        return LocalArtifactRepository(file_uri)
    return DbfsRestArtifactRepository(cleaned_artifact_uri)
Пример #4
0
 def list_artifacts(self, path=None):
     if path:
         dbfs_list_json = {'path': self._get_dbfs_path(path)}
     else:
         dbfs_list_json = {'path': self._get_dbfs_path('')}
     response = _dbfs_list_api(dbfs_list_json, self.http_request_kwargs)
     json_response = json.loads(response.text)
     # /api/2.0/dbfs/list will not have the 'files' key in the response for empty directories.
     infos = []
     artifact_prefix = strip_prefix(self.artifact_uri, 'dbfs:')
     if json_response.get('error_code', None) == RESOURCE_DOES_NOT_EXIST:
         return []
     dbfs_files = json_response.get('files', [])
     for dbfs_file in dbfs_files:
         is_dir = dbfs_file['is_dir']
         artifact_size = None if is_dir else dbfs_file['file_size']
         stripped_path = strip_prefix(dbfs_file['path'], artifact_prefix + '/')
         infos.append(FileInfo(stripped_path, is_dir, artifact_size))
     return sorted(infos, key=lambda f: f.path)
Пример #5
0
def dbfs_artifact_repo_factory(artifact_uri):
    """
    Returns an ArtifactRepository subclass for storing artifacts on DBFS.

    This factory method is used with URIs of the form ``dbfs:/<path>``. DBFS-backed artifact
    storage can only be used together with the RestStore.
    :param artifact_uri: DBFS root artifact URI (string).
    :return: Subclass of ArtifactRepository capable of storing artifacts on DBFS.
    """
    cleaned_artifact_uri = artifact_uri.rstrip('/')
    if mlflow.utils.databricks_utils.is_dbfs_fuse_available() \
            and os.environ.get(USE_FUSE_ENV_VAR, "").lower() != "false":
        # If the DBFS FUSE mount is available, write artifacts directly to /dbfs/... using
        # local filesystem APIs
        file_uri = "file:///dbfs/{}".format(strip_prefix(cleaned_artifact_uri, "dbfs:/"))
        return LocalArtifactRepository(file_uri)
    return DbfsRestArtifactRepository(cleaned_artifact_uri)
Пример #6
0
def dbfs_artifact_repo_factory(artifact_uri):
    """
    Returns an ArtifactRepository subclass for storing artifacts on DBFS.

    This factory method is used with URIs of the form ``dbfs:/<path>``. DBFS-backed artifact
    storage can only be used together with the RestStore.

    In the special case where the URI is of the form
    `dbfs:/databricks/mlflow-tracking/<Exp-ID>/<Run-ID>/<path>',
    a DatabricksArtifactRepository is returned. This is capable of storing access controlled
    artifacts.

    :param artifact_uri: DBFS root artifact URI (string).
    :return: Subclass of ArtifactRepository capable of storing artifacts on DBFS.
    """
    if not is_valid_dbfs_uri(artifact_uri):
        raise MlflowException(
            "DBFS URI must be of the form dbfs:/<path> or "
            + "dbfs://profile@databricks/<path>, but received "
            + artifact_uri
        )

    cleaned_artifact_uri = artifact_uri.rstrip("/")
    db_profile_uri = get_databricks_profile_uri_from_artifact_uri(cleaned_artifact_uri)
    if is_databricks_acled_artifacts_uri(artifact_uri):
        return DatabricksArtifactRepository(cleaned_artifact_uri)
    elif (
        mlflow.utils.databricks_utils.is_dbfs_fuse_available()
        and os.environ.get(USE_FUSE_ENV_VAR, "").lower() != "false"
        and not is_databricks_model_registry_artifacts_uri(artifact_uri)
        and (db_profile_uri is None or db_profile_uri == "databricks")
    ):
        # If the DBFS FUSE mount is available, write artifacts directly to
        # /dbfs/... using local filesystem APIs.
        # Note: it is possible for a named Databricks profile to point to the current workspace,
        # but we're going to avoid doing a complex check and assume users will use `databricks`
        # to mean the current workspace. Using `DbfsRestArtifactRepository` to access the current
        # workspace's DBFS should still work; it just may be slower.
        final_artifact_uri = remove_databricks_profile_info_from_artifact_uri(cleaned_artifact_uri)
        file_uri = "file:///dbfs/{}".format(strip_prefix(final_artifact_uri, "dbfs:/"))
        return LocalArtifactRepository(file_uri)
    return DbfsRestArtifactRepository(cleaned_artifact_uri)
Пример #7
0
def test_strip_prefix(original, prefix, expected):
    assert strip_prefix(original, prefix) == expected
Пример #8
0
 def _get_dbfs_path(self, artifact_path):
     return '/%s/%s' % (strip_prefix(
         self.artifact_uri, 'dbfs:/'), strip_prefix(artifact_path, '/'))
def _get_dbfs_endpoint(artifact_uri, artifact_path):
    return "/dbfs/%s/%s" % (strip_prefix(
        artifact_uri.rstrip('/'), 'dbfs:/'), strip_prefix(artifact_path, '/'))
Пример #10
0
 def _get_dbfs_path(self, artifact_path):
     return "/%s/%s" % (
         strip_prefix(self.artifact_uri, "dbfs:/"),
         strip_prefix(artifact_path, "/"),
     )