Exemplo n.º 1
0
 def log_artifacts(self, local_dir, artifact_path=None):
     verify_artifact_path(artifact_path)
     # NOTE: The artifact_path is expected to be in posix format.
     # Posix paths work fine on windows but just in case we normalize it here.
     if artifact_path:
         artifact_path = os.path.normpath(artifact_path)
     artifact_dir = os.path.join(self.artifact_dir, artifact_path) if artifact_path else \
         self.artifact_dir
     if not os.path.exists(artifact_dir):
         mkdir(artifact_dir)
     dir_util.copy_tree(src=local_dir,
                        dst=artifact_dir,
                        preserve_mode=0,
                        preserve_times=0)
Exemplo n.º 2
0
    def log_artifact(self, local_file, artifact_path=None):
        verify_artifact_path(artifact_path)
        # NOTE: The artifact_path is expected to be in posix format.
        # Posix paths work fine on windows but just in case we normalize it here.
        if artifact_path:
            artifact_path = os.path.normpath(artifact_path)

        artifact_dir = os.path.join(self.artifact_dir, artifact_path) if artifact_path else \
            self.artifact_dir
        if not os.path.exists(artifact_dir):
            mkdir(artifact_dir)
        shutil.copyfile(
            local_file, os.path.join(artifact_dir,
                                     os.path.basename(local_file)))
Exemplo n.º 3
0
    def __init__(self, db_uri, default_artifact_root):
        """
        Create a database backed store.

        :param db_uri: The SQLAlchemy database URI string to connect to the database. See
                       the `SQLAlchemy docs
                       <https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>`_
                       for format specifications. Mlflow supports the dialects ``mysql``,
                       ``mssql``, ``sqlite``, and ``postgresql``.
        :param default_artifact_root: Path/URI to location suitable for large data (such as a blob
                                      store object, DBFS path, or shared NFS file system).
        """
        super(SqlAlchemyStore, self).__init__()
        self.db_uri = db_uri
        self.db_type = extract_db_type_from_uri(db_uri)
        self.artifact_root_uri = default_artifact_root
        self.engine = kiwi.store.db.utils.create_sqlalchemy_engine(db_uri)
        # On a completely fresh MLflow installation against an empty database (verify database
        # emptiness by checking that 'experiments' etc aren't in the list of table names), run all
        # DB migrations
        expected_tables = [
            SqlExperiment.__tablename__,
            SqlRun.__tablename__,
            SqlMetric.__tablename__,
            SqlParam.__tablename__,
            SqlTag.__tablename__,
            SqlExperimentTag.__tablename__,
            SqlLatestMetric.__tablename__,
        ]
        inspected_tables = set(
            sqlalchemy.inspect(self.engine).get_table_names())
        if any([table not in inspected_tables for table in expected_tables]):
            kiwi.store.db.utils._initialize_tables(self.engine)
        Base.metadata.bind = self.engine
        SessionMaker = sqlalchemy.orm.sessionmaker(bind=self.engine)
        self.ManagedSessionMaker = kiwi.store.db.utils._get_managed_session_maker(
            SessionMaker, self.db_type)
        kiwi.store.db.utils._verify_schema(self.engine)

        if is_local_uri(default_artifact_root):
            mkdir(local_file_uri_to_path(default_artifact_root))

        if len(self.list_experiments()) == 0:
            with self.ManagedSessionMaker() as session:
                self._create_default_experiment(session)
Exemplo n.º 4
0
 def __init__(self, root_directory=None, artifact_root_uri=None):
     """
     Create a new FileStore with the given root directory and a given default artifact root URI.
     """
     super(FileStore, self).__init__()
     self.root_directory = local_file_uri_to_path(root_directory
                                                  or _default_root_dir())
     self.artifact_root_uri = artifact_root_uri or path_to_local_file_uri(
         self.root_directory)
     self.trash_folder = os.path.join(self.root_directory,
                                      FileStore.TRASH_FOLDER_NAME)
     # Create root directory if needed
     if not exists(self.root_directory):
         mkdir(self.root_directory)
         self._create_experiment_with_id(
             name=Experiment.DEFAULT_EXPERIMENT_NAME,
             experiment_id=FileStore.DEFAULT_EXPERIMENT_ID,
             artifact_uri=None)
     # Create trash folder if needed
     if not exists(self.trash_folder):
         mkdir(self.trash_folder)
Exemplo n.º 5
0
    def download_artifacts(self, artifact_path, dst_path=None):
        """
            Download an artifact file or directory to a local directory/file if applicable, and
            return a local path for it.
            The caller is responsible for managing the lifecycle of the downloaded artifacts.

            (self.path contains the base path - hdfs:/some/path/run_id/artifacts)

            :param artifact_path: Relative source path to the desired artifacts file or directory.
            :param dst_path: Absolute path of the local filesystem destination directory to which
                             to download the specified artifacts. This directory must already
                             exist. If unspecified, the artifacts will be downloaded to a new,
                             uniquely-named
                             directory on the local filesystem.

            :return: Absolute path of the local filesystem location containing the downloaded
            artifacts - file/directory.
        """

        hdfs_base_path = _resolve_base_path(self.path, artifact_path)
        local_dir = _tmp_dir(dst_path)

        with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:

            if not hdfs.isdir(hdfs_base_path):
                local_path = os.path.join(local_dir, os.path.normpath(artifact_path))
                _download_hdfs_file(hdfs, hdfs_base_path, local_path)
                return local_path

            for path, is_dir, _ in self._walk_path(hdfs, hdfs_base_path):

                relative_path = _relative_path_remote(hdfs_base_path, path)
                local_path = os.path.join(local_dir, relative_path) \
                    if relative_path else local_dir

                if is_dir:
                    mkdir(local_path)
                else:
                    _download_hdfs_file(hdfs, path, local_path)
            return local_dir
Exemplo n.º 6
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri):
     artifact_uri = artifact_uri or append_to_uri_path(
         self.artifact_root_uri, str(experiment_id))
     self._check_root_dir()
     meta_dir = mkdir(self.root_directory, str(experiment_id))
     experiment = Experiment(experiment_id, name, artifact_uri,
                             LifecycleStage.ACTIVE)
     experiment_dict = dict(experiment)
     # tags are added to the file system and are not written to this dict on write
     # As such, we should not include them in the meta file.
     del experiment_dict['tags']
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict)
     return experiment_id
Exemplo n.º 7
0
def test_mkdir(tmpdir):
    temp_dir = str(tmpdir)
    new_dir_name = "mkdir_test_%d" % random_int()
    file_utils.mkdir(temp_dir, new_dir_name)
    assert os.listdir(temp_dir) == [new_dir_name]

    with pytest.raises(OSError):
        file_utils.mkdir("/   bad directory @ name ", "ouch")

    # does not raise if directory exists already
    file_utils.mkdir(temp_dir, new_dir_name)

    # raises if it exists already but is a file
    dummy_file_path = str(tmpdir.join("dummy_file"))
    open(dummy_file_path, 'a').close()
    with pytest.raises(OSError):
        file_utils.mkdir(dummy_file_path)
Exemplo n.º 8
0
 def create_run(self, experiment_id, user_id, start_time, tags):
     """
     Creates a run with the specified attributes.
     """
     experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise MlflowException(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id,
             databricks_pb2.RESOURCE_DOES_NOT_EXIST)
     if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise MlflowException(
             "Could not create run under non-active experiment with ID "
             "%s." % experiment_id, databricks_pb2.INVALID_STATE)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid,
                        run_id=run_uuid,
                        experiment_id=experiment_id,
                        artifact_uri=artifact_uri,
                        user_id=user_id,
                        status=RunStatus.to_string(RunStatus.RUNNING),
                        start_time=start_time,
                        end_time=None,
                        lifecycle_stage=LifecycleStage.ACTIVE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id)
     mkdir(run_dir)
     run_info_dict = _make_persisted_run_info_dict(run_info)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict)
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     return self.get_run(run_id=run_uuid)