Пример #1
0
def rename_experiment(experiment_id, new_name):
    """
    Renames an active experiment.
    Returns an error if the experiment is inactive.
    """
    store = _get_store()
    store.rename_experiment(experiment_id, new_name)
    print("Experiment with id %s has been renamed to '%s'." %
          (experiment_id, new_name))
Пример #2
0
def restore_experiment(experiment_id):
    """
    Restore a deleted experiment. This also applies to experiment's metadata, runs and associated
    data. The command throws an error if the experiment is already active, cannot be found, or
    permanently deleted.
    """
    store = _get_store()
    store.restore_experiment(experiment_id)
    print("Experiment with id %s has been restored." % str(experiment_id))
Пример #3
0
def delete_run(run_id):
    """
    Mark a run for deletion. Return an error if the run does not exist or
    is already marked. You can restore a marked run with ``restore_run``,
    or permanently delete a run in the backend store.
    """
    store = _get_store()
    store.delete_run(run_id)
    click.echo("Run with ID %s has been deleted." % str(run_id))
Пример #4
0
def delete_experiment(experiment_id):
    """
    Mark an experiment for deletion. Return an error if the experiment does not exist or
    is already marked. You can restore a marked experiment with ``restore_experiment``,
    or permanently delete an experiment in the backend store.
    """
    store = _get_store()
    store.delete_experiment(experiment_id)
    print("Experiment with ID %s has been deleted." % str(experiment_id))
Пример #5
0
def list_experiments(view):
    """
    List all experiments in the configured tracking server.
    """
    store = _get_store()
    view_type = ViewType.from_string(view) if view else ViewType.ACTIVE_ONLY
    experiments = store.list_experiments(view_type)
    table = [[exp.experiment_id, exp.name, exp.artifact_location if is_uri(exp.artifact_location)
              else os.path.abspath(exp.artifact_location)] for exp in experiments]
    print(tabulate(sorted(table), headers=["Experiment Id", "Name", "Artifact Location"]))
Пример #6
0
def list_artifacts(run_id, artifact_path):
    """
    Return all the artifacts directly under run's root artifact directory,
    or a sub-directory. The output is a JSON-formatted list.
    """
    artifact_path = artifact_path if artifact_path is not None else ""
    store = _get_store()
    artifact_uri = store.get_run(run_id).info.artifact_uri
    artifact_repo = get_artifact_repository(artifact_uri)
    file_infos = artifact_repo.list_artifacts(artifact_path)
    click.echo(_file_infos_to_json(file_infos))
Пример #7
0
def download_artifacts(run_id, artifact_path):
    """
    Download an artifact file or directory to a local directory.
    The output is the name of the file or directory on the local disk.
    """
    artifact_path = artifact_path if artifact_path is not None else ""
    store = _get_store()
    artifact_uri = store.get_run(run_id).info.artifact_uri
    artifact_repo = ArtifactRepository.from_artifact_uri(artifact_uri, store)
    artifact_location = artifact_repo.download_artifacts(artifact_path)
    print(artifact_location)
Пример #8
0
def log_artifacts(local_dir, run_id, artifact_path):
    """
    Logs the files within a local directory as an artifact of a run, optionally
    within a run-specific artifact path. Run artifacts can be organized into
    directories, so you can place the artifact in a directory this way.
    """
    store = _get_store()
    artifact_uri = store.get_run(run_id).info.artifact_uri
    artifact_repo = get_artifact_repository(artifact_uri, store)
    artifact_repo.log_artifacts(local_dir, artifact_path)
    _logger.info("Logged artifact from local dir %s to artifact_path=%s", local_dir, artifact_path)
Пример #9
0
def log_artifact(local_file, run_id, artifact_path):
    """
    Logs a local file as an artifact of a run, optionally within a run-specific
    artifact path. Run artifacts can be organized into directories, so you can
    place the artifact in a directory this way.
    """
    store = _get_store()
    artifact_uri = store.get_run(run_id).info.artifact_uri
    artifact_repo = ArtifactRepository.from_artifact_uri(artifact_uri, store)
    artifact_repo.log_artifact(local_file, artifact_path)
    eprint("Logged artifact from local file %s to artifact_path=%s" %
           (local_file, artifact_path))
Пример #10
0
def list_run(experiment_id, view):
    """
    List all runs of the specified experiment in the configured tracking server.
    """
    store = _get_store()
    view_type = ViewType.from_string(view) if view else ViewType.ACTIVE_ONLY
    runs = store.search_runs([experiment_id], [], view_type)
    table = []
    for run in runs:
        tags = {t.key: t.value for t in run.data.tags}
        run_name = tags.get(MLFLOW_RUN_NAME, "")
        table.append([conv_longdate_to_str(run.info.start_time), run_name, run.info.run_uuid])
    print(tabulate(sorted(table, reverse=True), headers=["Date", "Name", "ID"]))
Пример #11
0
def create(experiment_name, artifact_location):
    """
    Create an experiment.

    All artifacts generated by runs related to this experiment will be stored under artifact
    location, organized under specific run_uuid sub-directories.

    Implementation of experiment and metadata store is dependent on backend storage. ``FileStore``
    creates a folder for each experiment ID and stores metadata in ``meta.yaml``. Runs are stored
    as subfolders.
    """
    store = _get_store()
    exp_id = store.create_experiment(experiment_name, artifact_location)
    print("Created experiment '%s' with id %s" % (experiment_name, exp_id))
Пример #12
0
def get_dbnd_store(store_uri=None, artifact_uri=None):
    dbnd_store_url, duplicate_tracking_to = parse_composite_uri(store_uri)

    logger.info("MLFlow DBND Tracking Store url: {}".format(dbnd_store_url))
    logger.info("MLFlow DBND Tracking Store duplication to: {}".format(
        duplicate_tracking_to))

    duplication_store = None
    if duplicate_tracking_to is not None:
        # avoid cyclic imports during `_tracking_store_registry.register_entrypoints()`
        from mlflow.tracking import _get_store

        duplication_store = _get_store(duplicate_tracking_to, artifact_uri)

    dbnd_store = TrackingStoreApi(channel=TrackingApiClient(dbnd_store_url))

    return DatabandStore(dbnd_store, duplication_store)
Пример #13
0
def delete_experiment(experiment_id):
    """
    Mark an active experiment for deletion. This also applies to experiment's metadata, runs and
    associated data, and artifacts if they are store in default location. Use ``list`` command to
    view artifact location. Command will throw an error if experiment is not found or already
    marked for deletion.

    Experiments marked for deletion can be restored using ``restore`` command, unless they are
    permanently deleted.

    Specific implementation of deletion is dependent on backend stores. ``FileStore`` moves
    experiments marked for deletion under a ``.trash`` folder under the main folder used to
    instantiate ``FileStore``. Experiments marked for deletion can be permanently deleted by
    clearing the ``.trash`` folder. It is recommended to use a ``cron`` job or an alternate
    workflow mechanism to clear ``.trash`` folder.
    """
    store = _get_store()
    store.delete_experiment(experiment_id)
    print("Experiment with ID %s has been deleted." % str(experiment_id))
Пример #14
0
def gc(backend_store_uri, run_ids):
    """
    Permanently delete runs in the `deleted` lifecycle stage from the specified backend store.
    This command deletes all artifacts and metadata associated with the specified runs.
    """
    backend_store = _get_store(backend_store_uri, None)
    if not run_ids:
        run_ids = backend_store._get_deleted_runs()
    else:
        run_ids = run_ids.split(',')

    for run_id in run_ids:
        run = backend_store.get_run(run_id)
        if run.info.lifecycle_stage != LifecycleStage.DELETED:
            raise MlflowException(
                'Run {} is not in `deleted` lifecycle stage. Only runs in '
                '`deleted` lifecycle stage can be deleted.'.format(run_id))
        artifact_repo = get_artifact_repository(run.info.artifact_uri)
        artifact_repo.delete_artifacts()
        backend_store._hard_delete_run(run_id)
        print("Run with ID %s has been permanently deleted." % str(run_id))
Пример #15
0
def download_artifacts(run_id, artifact_path, artifact_uri):
    """
    Download an artifact file or directory to a local directory.
    The output is the name of the file or directory on the local disk.

    Either ``--run-id`` or ``--artifact-uri`` must be provided.
    """
    if run_id is None and artifact_uri is None:
        _logger.error(
            "Either ``--run-id`` or ``--artifact-uri`` must be provided.")
        sys.exit(1)

    if artifact_uri is not None:
        click.echo(_download_artifact_from_uri(artifact_uri))
        return

    artifact_path = artifact_path if artifact_path is not None else ""
    store = _get_store()
    artifact_uri = store.get_run(run_id).info.artifact_uri
    artifact_repo = get_artifact_repository(artifact_uri)
    artifact_location = artifact_repo.download_artifacts(artifact_path)
    click.echo(artifact_location)
Пример #16
0
def start_run(run_id=None,
              experiment_id=None,
              run_name=None,
              nested=False,
              tags=None):
    """
    Start a new MLflow run, setting it as the active run under which metrics and parameters
    will be logged. The return value can be used as a context manager within a ``with`` block;
    otherwise, you must call ``end_run()`` to terminate the current run.

    If you pass a ``run_id`` or the ``MLFLOW_RUN_ID`` environment variable is set,
    ``start_run`` attempts to resume a run with the specified run ID and
    other parameters are ignored. ``run_id`` takes precedence over ``MLFLOW_RUN_ID``.

    If resuming an existing run, the run status is set to ``RunStatus.RUNNING``.

    MLflow sets a variety of default tags on the run, as defined in
    :ref:`MLflow system tags <system_tags>`.

    :param run_id: If specified, get the run with the specified UUID and log parameters
                     and metrics under that run. The run's end time is unset and its status
                     is set to running, but the run's other attributes (``source_version``,
                     ``source_type``, etc.) are not changed.
    :param experiment_id: ID of the experiment under which to create the current run (applicable
                          only when ``run_id`` is not specified). If ``experiment_id`` argument
                          is unspecified, will look for valid experiment in the following order:
                          activated using ``set_experiment``, ``MLFLOW_EXPERIMENT_NAME``
                          environment variable, ``MLFLOW_EXPERIMENT_ID`` environment variable,
                          or the default experiment as defined by the tracking server.
    :param run_name: Name of new run (stored as a ``mlflow.runName`` tag).
                     Used only when ``run_id`` is unspecified.
    :param nested: Controls whether run is nested in parent run. ``True`` creates a nested run.
    :param tags: An optional dictionary of string keys and values to set as tags on the new run.
    :return: :py:class:`mlflow.ActiveRun` object that acts as a context manager wrapping
             the run's state.

    .. code-block:: python
        :caption: Example

        import mlflow

        # Create nested runs
        with mlflow.start_run(run_name='PARENT_RUN') as parent_run:
            mlflow.log_param("parent", "yes")
            with mlflow.start_run(run_name='CHILD_RUN', nested=True) as child_run:
                mlflow.log_param("child", "yes")

        print("parent run_id: {}".format(parent_run.info.run_id))
        print("child run_id : {}".format(child_run.info.run_id))
        print("--")

        # Search all child runs with a parent id
        query = "tags.mlflow.parentRunId = '{}'".format(parent_run.info.run_id)
        results = mlflow.search_runs(filter_string=query)
        print(results[["run_id", "params.child", "tags.mlflow.runName"]])

    .. code-block:: text
        :caption: Output

        parent run_id: 5ec0e7ae18f54c2694ffb48c2fccf25c
        child run_id : 78b3b0d264b44cd29e8dc389749bb4be
        --
                                     run_id params.child tags.mlflow.runName
        0  78b3b0d264b44cd29e8dc389749bb4be          yes           CHILD_RUN
    """
    global _active_run_stack
    # back compat for int experiment_id
    experiment_id = str(experiment_id) if isinstance(experiment_id,
                                                     int) else experiment_id
    if len(_active_run_stack) > 0 and not nested:
        raise Exception((
            "Run with UUID {} is already active. To start a new run, first end the "
            + "current run with mlflow.end_run(). To start a nested " +
            "run, call start_run with nested=True").format(
                _active_run_stack[0].info.run_id))
    if run_id:
        existing_run_id = run_id
    elif _RUN_ID_ENV_VAR in os.environ:
        existing_run_id = os.environ[_RUN_ID_ENV_VAR]
        del os.environ[_RUN_ID_ENV_VAR]
    else:
        existing_run_id = None
    if existing_run_id:
        _validate_run_id(existing_run_id)
        active_run_obj = MlflowClient().get_run(existing_run_id)
        # Check to see if experiment_id from environment matches experiment_id from set_experiment()
        if (_active_experiment_id is not None and
                _active_experiment_id != active_run_obj.info.experiment_id):
            raise MlflowException(
                "Cannot start run with ID {} because active run ID "
                "does not match environment run ID. Make sure --experiment-name "
                "or --experiment-id matches experiment set with "
                "set_experiment(), or just use command-line "
                "arguments".format(existing_run_id))
        # Check to see if current run isn't deleted
        if active_run_obj.info.lifecycle_stage == LifecycleStage.DELETED:
            raise MlflowException(
                "Cannot start run with ID {} because it is in the "
                "deleted state.".format(existing_run_id))
        # Use previous end_time because a value is required for update_run_info
        end_time = active_run_obj.info.end_time
        _get_store().update_run_info(existing_run_id,
                                     run_status=RunStatus.RUNNING,
                                     end_time=end_time)
        active_run_obj = MlflowClient().get_run(existing_run_id)
    else:
        if len(_active_run_stack) > 0:
            parent_run_id = _active_run_stack[-1].info.run_id
        else:
            parent_run_id = None

        exp_id_for_run = experiment_id if experiment_id is not None else _get_experiment_id(
        )

        user_specified_tags = tags or {}
        if parent_run_id is not None:
            user_specified_tags[MLFLOW_PARENT_RUN_ID] = parent_run_id
        if run_name is not None:
            user_specified_tags[MLFLOW_RUN_NAME] = run_name

        tags = context_registry.resolve_tags(user_specified_tags)

        active_run_obj = MlflowClient().create_run(
            experiment_id=exp_id_for_run, tags=tags)

    _active_run_stack.append(ActiveRun(active_run_obj))
    return _active_run_stack[-1]
Пример #17
0
def create(experiment_name, artifact_location):
    """
    Create an experiment in the configured tracking server.
    """
    store = _get_store()
    exp_id = store.create_experiment(experiment_name, artifact_location)