Exemplo n.º 1
0
def _maybe_set_run_terminated(active_run, status):
    """If the passed-in active run is defined and still running (i.e. hasn't
    already been terminated within user code), mark it as terminated with the
    passed-in status."""
    if active_run is None:
        return
    run_id = active_run.info.run_id
    cur_status = tracking.MlflowClient().get_run(run_id).info.status
    if RunStatus.is_terminated(cur_status):
        return
    tracking.MlflowClient().set_terminated(run_id, status)
Exemplo n.º 2
0
def _create_run(uri, experiment_id, work_dir, entry_point):
    """Create a ``Run`` against the current MLflow tracking server, logging
    metadata (e.g. the URI, entry point, and parameters of the project) about
    the run.

    Return an ``ActiveRun`` that can be used to report additional data about
    the run (metrics/params) to the tracking server.
    """
    if _is_local_uri(uri):
        source_name = tracking._tracking_service.utils._get_git_url_if_present(
            _expand_uri(uri))
    else:
        source_name = _expand_uri(uri)
    source_version = _get_git_commit(work_dir)
    existing_run = fluent.active_run()
    if existing_run:
        parent_run_id = existing_run.info.run_id
    else:
        parent_run_id = None

    tags = {
        MLFLOW_USER: _get_user(),
        MLFLOW_SOURCE_NAME: source_name,
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT),
        MLFLOW_PROJECT_ENTRY_POINT: entry_point
    }
    if source_version is not None:
        tags[MLFLOW_GIT_COMMIT] = source_version
    if parent_run_id is not None:
        tags[MLFLOW_PARENT_RUN_ID] = parent_run_id

    active_run = tracking.MlflowClient().create_run(
        experiment_id=experiment_id, tags=tags)
    return active_run
Exemplo n.º 3
0
def _resolve_experiment_id(experiment_name=None, experiment_id=None):
    """Resolve experiment.

    Verifies either one or other is specified - cannot be both selected.

    If ``experiment_name`` is provided and does not exist, an experiment
    of that name is created and its id is returned.

    :param experiment_name: Name of experiment under which to launch the run.
    :param experiment_id: ID of experiment under which to launch the run.
    :return: str
    """

    if experiment_name and experiment_id:
        raise MlflowException(
            "Specify only one of 'experiment_name' or 'experiment_id'.")

    if experiment_id:
        return str(experiment_id)

    if experiment_name:
        client = tracking.MlflowClient()
        exp = client.get_experiment_by_name(experiment_name)
        if exp:
            return exp.experiment_id
        else:
            print(
                "INFO: '{}' does not exist. Creating a new experiment".format(
                    experiment_name))
            return client.create_experiment(experiment_name)

    return _get_experiment_id()
Exemplo n.º 4
0
def _wait_for(submitted_run_obj):
    """Wait on the passed-in submitted run, reporting its status to the
    tracking server."""
    run_id = submitted_run_obj.run_id
    active_run = None
    # Note: there's a small chance we fail to report the run's status to the
    # tracking server if
    # we're interrupted before we reach the try block below
    try:
        active_run = tracking.MlflowClient().get_run(
            run_id) if run_id is not None else None
        if submitted_run_obj.wait():
            _logger.info("=== Run (ID '%s') succeeded ===", run_id)
            _maybe_set_run_terminated(active_run, 'FINISHED')
        else:
            _maybe_set_run_terminated(active_run, 'FAILED')
            raise ExecutionException("Run (ID '%s') failed" % run_id)
    except KeyboardInterrupt:
        _logger.error("=== Run (ID '%s') interrupted, cancelling run ===",
                      run_id)
        submitted_run_obj.cancel()
        _maybe_set_run_terminated(active_run, 'FAILED')
        raise
Exemplo n.º 5
0
def _run(uri,
         experiment_id,
         entry_point='main',
         version=None,
         parameters=None,
         backend=None,
         backend_config=None,
         use_conda=True,
         storage_dir=None,
         synchronous=True,
         run_id=None):
    """Helper that delegates to the project-running method corresponding to the
    passed-in backend.

    Returns a ``SubmittedRun`` corresponding to the project run.
    """

    parameters = parameters or {}
    work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version)
    project = _project_spec.load_project(work_dir)
    _validate_execution_environment(project, backend)  # noqa
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    if run_id:
        active_run = tracking.MlflowClient().get_run(run_id)
    else:
        active_run = _create_run(uri, experiment_id, work_dir, entry_point)

    # Consolidate parameters for logging.
    # `storage_dir` is `None` since we want to log actual path not downloaded
    # local path
    entry_point_obj = project.get_entry_point(entry_point)
    final_params, extra_params = entry_point_obj.compute_parameters(
        parameters, storage_dir=None)
    for key, value in (list(final_params.items()) +
                       list(extra_params.items())):
        tracking.MlflowClient().log_param(active_run.info.run_id, key, value)

    repo_url = _get_git_repo_url(work_dir)
    if repo_url is not None:
        for tag in [MLFLOW_GIT_REPO_URL, LEGACY_MLFLOW_GIT_REPO_URL]:
            tracking.MlflowClient().set_tag(active_run.info.run_id, tag,
                                            repo_url)

    # Add branch name tag if a branch is specified through -version
    if _is_valid_branch_name(work_dir, version):
        for tag in [MLFLOW_GIT_BRANCH, LEGACY_MLFLOW_GIT_BRANCH_NAME]:
            tracking.MlflowClient().set_tag(active_run.info.run_id, tag,
                                            version)

    if backend == 'local' or backend is None:
        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_BACKEND, 'local')
        command_args = []
        command_separator = ' '
        # If a docker_env attribute is defined in MLproject then it takes
        # precedence over conda yaml environments, so the project will be
        # executed inside a docker container.
        if project.docker_env:
            pass
        # tracking.MlflowClient().set_tag(active_run.info.run_id,
        #                                 MLFLOW_PROJECT_ENV, 'docker')
        # _validate_docker_env(project)
        # _validate_docker_installation()
        # image = _build_docker_image(
        #     work_dir=work_dir,
        #     repository_uri=project.name,
        #     base_image=project.docker_env.get('image'),
        #     run_id=active_run.info.run_id)
        # command_args += _get_docker_command(
        #     image=image,
        #     active_run=active_run,
        #     volumes=project.docker_env.get('volumes'),
        #     user_env_vars=project.docker_env.get('environment'))
        # Synchronously create a conda environment (even though this may take
        # some time) to avoid failures due to multiple concurrent attempts to
        # create the same conda env.
        # elif use_conda:
        #     tracking.MlflowClient().set_tag(active_run.info.run_id,
        #                                     MLFLOW_PROJECT_ENV, 'conda')
        #     command_separator = ' && '
        #     conda_env_name = _get_or_create_conda_env(project.conda_env_path)
        #     command_args += _get_conda_command(conda_env_name)
        # In synchronous mode, run the entry point command in a blocking
        # fashion, sending status updates to the tracking server when finished
        # . Note that the run state may not be persisted to the tracking server
        # if interrupted
        if synchronous:
            command_args += _get_entry_point_command(project, entry_point,
                                                     parameters, storage_dir)
            command_str = command_separator.join(command_args)
            return _run_entry_point(command_str,
                                    work_dir,
                                    experiment_id,
                                    run_id=active_run.info.run_id)
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(work_dir=work_dir,
                                             entry_point=entry_point,
                                             parameters=parameters,
                                             experiment_id=experiment_id,
                                             use_conda=use_conda,
                                             storage_dir=storage_dir,
                                             run_id=active_run.info.run_id)
    # elif backend == 'kubernetes':
    # from segmind.projects import kubernetes as kb
    # tracking.MlflowClient().set_tag(active_run.info.run_id,
    #                                 MLFLOW_PROJECT_ENV, 'docker')
    # tracking.MlflowClient().set_tag(active_run.info.run_id,
    #                                 MLFLOW_PROJECT_BACKEND, 'kubernetes')
    # _validate_docker_env(project)
    # _validate_docker_installation()
    # kube_config = _parse_kubernetes_config(backend_config)
    # image = _build_docker_image(
    #     work_dir=work_dir,
    #     repository_uri=kube_config['repository-uri'],
    #     base_image=project.docker_env.get('image'),
    #     run_id=active_run.info.run_id)
    # image_digest = kb.push_image_to_registry(image.tags[0])
    # submitted_run = kb.run_kubernetes_job(
    #     project.name, active_run, image.tags[0], image_digest,
    #     _get_entry_point_command(project, entry_point, parameters,
    #                              storage_dir),
    #     _get_run_env_vars(
    #         run_id=active_run.info.run_uuid,
    #         experiment_id=active_run.info.experiment_id),
    #     kube_config.get('kube-context', None),
    #     kube_config['kube-job-template'])
    # return submitted_run

    supported_backends = ['local', 'kubernetes']
    raise ExecutionException('Got unsupported execution mode %s. Supported '
                             'values: %s' % (backend, supported_backends))