示例#1
0
def _get_docker_command(image,
                        active_run,
                        docker_args=None,
                        volumes=None,
                        user_env_vars=None):
    from mlflow.projects.docker import get_docker_tracking_cmd_and_envs

    docker_path = "docker"
    cmd = [docker_path, "run", "--rm"]

    if docker_args:
        for name, value in docker_args.items():
            # Passed just the name as boolean flag
            if isinstance(value, bool) and value:
                if len(name) == 1:
                    cmd += ["-" + name]
                else:
                    cmd += ["--" + name]
            else:
                # Passed name=value
                if len(name) == 1:
                    cmd += ["-" + name, value]
                else:
                    cmd += ["--" + name, value]

    env_vars = get_run_env_vars(run_id=active_run.info.run_id,
                                experiment_id=active_run.info.experiment_id)
    tracking_uri = tracking.get_tracking_uri()
    tracking_cmds, tracking_envs = get_docker_tracking_cmd_and_envs(
        tracking_uri)
    artifact_cmds, artifact_envs = _get_docker_artifact_storage_cmd_and_envs(
        active_run.info.artifact_uri)

    cmd += tracking_cmds + artifact_cmds
    env_vars.update(tracking_envs)
    env_vars.update(artifact_envs)
    if user_env_vars is not None:
        for user_entry in user_env_vars:
            if isinstance(user_entry, list):
                # User has defined a new environment variable for the docker environment
                env_vars[user_entry[0]] = user_entry[1]
            else:
                # User wants to copy an environment variable from system environment
                system_var = os.environ.get(user_entry)
                if system_var is None:
                    raise MlflowException(
                        "This project expects the %s environment variables to "
                        "be set on the machine running the project, but %s was "
                        "not set. Please ensure all expected environment variables "
                        "are set" % (", ".join(user_env_vars), user_entry))
                env_vars[user_entry] = system_var

    if volumes is not None:
        for v in volumes:
            cmd += ["-v", v]

    for key, value in env_vars.items():
        cmd += ["-e", "{key}={value}".format(key=key, value=value)]
    cmd += [image.tags[0]]
    return cmd
示例#2
0
def _run_entry_point(command, work_dir, experiment_id, run_id):
    """
    Run an entry point command in a subprocess, returning a SubmittedRun that can be used to
    query the run's status.
    :param command: Entry point command to run
    :param work_dir: Working directory in which to run the command
    :param run_id: MLflow run ID associated with the entry point execution.
    """
    env = os.environ.copy()
    env.update(get_run_env_vars(run_id, experiment_id))
    env.update(get_databricks_env_vars(tracking_uri=mlflow.get_tracking_uri()))
    _logger.info("=== Running command '%s' in run with ID '%s' === ", command,
                 run_id)
    # in case os name is not 'nt', we are not running on windows. It introduces
    # bash command otherwise.
    if os.name != "nt":
        process = subprocess.Popen(["bash", "-c", command],
                                   close_fds=True,
                                   cwd=work_dir,
                                   env=env)
    else:
        # process = subprocess.Popen(command, close_fds=True, cwd=work_dir, env=env)
        process = subprocess.Popen(["cmd", "/c", command],
                                   close_fds=True,
                                   cwd=work_dir,
                                   env=env)
    return LocalSubmittedRun(run_id, process)
示例#3
0
def _invoke_mlflow_run_subprocess(work_dir, entry_point, parameters,
                                  experiment_id, use_conda, storage_dir,
                                  run_id):
    """
    Run an MLflow project asynchronously by invoking ``mlflow run`` in a subprocess, returning
    a SubmittedRun that can be used to query run status.
    """
    _logger.info("=== Asynchronously launching MLflow run with ID %s ===",
                 run_id)
    mlflow_run_arr = _build_mlflow_run_cmd(uri=work_dir,
                                           entry_point=entry_point,
                                           storage_dir=storage_dir,
                                           use_conda=use_conda,
                                           run_id=run_id,
                                           parameters=parameters)
    env_vars = get_run_env_vars(run_id, experiment_id)
    env_vars.update(get_databricks_env_vars(mlflow.get_tracking_uri()))
    mlflow_run_subprocess = _run_mlflow_run_cmd(mlflow_run_arr, env_vars)
    return LocalSubmittedRun(run_id, mlflow_run_subprocess)
示例#4
0
def _run(
    uri,
    experiment_id,
    entry_point,
    version,
    parameters,
    docker_args,
    backend_name,
    backend_config,
    use_conda,
    storage_dir,
    synchronous,
):
    """
    Helper that delegates to the project-running method corresponding to the passed-in backend.
    Returns a ``SubmittedRun`` corresponding to the project run.
    """
    tracking_store_uri = tracking.get_tracking_uri()
    backend_config[PROJECT_USE_CONDA] = use_conda
    backend_config[PROJECT_SYNCHRONOUS] = synchronous
    backend_config[PROJECT_DOCKER_ARGS] = docker_args
    backend_config[PROJECT_STORAGE_DIR] = storage_dir
    # TODO: remove this check once kubernetes execution has been refactored
    if backend_name not in {"databricks", "kubernetes"}:
        backend = loader.load_backend(backend_name)
        if backend:
            submitted_run = backend.run(
                uri,
                entry_point,
                parameters,
                version,
                backend_config,
                tracking_store_uri,
                experiment_id,
            )
            tracking.MlflowClient().set_tag(submitted_run.run_id,
                                            MLFLOW_PROJECT_BACKEND,
                                            backend_name)
            return submitted_run

    work_dir = fetch_and_validate_project(uri, version, entry_point,
                                          parameters)
    project = load_project(work_dir)
    _validate_execution_environment(project, backend_name)

    active_run = get_or_create_run(None, uri, experiment_id, work_dir, version,
                                   entry_point, parameters)

    if backend_name == "databricks":
        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_BACKEND, "databricks")
        from mlflow.projects.databricks import run_databricks

        return run_databricks(
            remote_run=active_run,
            uri=uri,
            entry_point=entry_point,
            work_dir=work_dir,
            parameters=parameters,
            experiment_id=experiment_id,
            cluster_spec=backend_config,
        )

    elif backend_name == "kubernetes":
        from mlflow.projects.docker import (
            build_docker_image,
            validate_docker_env,
            validate_docker_installation,
        )
        from mlflow.projects import kubernetes as kb

        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_ENV, "docker")
        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_BACKEND, "kubernetes")
        validate_docker_env(project)
        validate_docker_installation()
        kube_config = _parse_kubernetes_config(backend_config)
        image = build_docker_image(
            work_dir=work_dir,
            repository_uri=kube_config["repository-uri"],
            base_image=project.docker_env.get("image"),
            run_id=active_run.info.run_id,
        )
        image_digest = kb.push_image_to_registry(image.tags[0])
        submitted_run = kb.run_kubernetes_job(
            project.name,
            active_run,
            image.tags[0],
            image_digest,
            get_entry_point_command(project, entry_point, parameters,
                                    storage_dir),
            get_run_env_vars(run_id=active_run.info.run_uuid,
                             experiment_id=active_run.info.experiment_id),
            kube_config.get("kube-context", None),
            kube_config["kube-job-template"],
        )
        return submitted_run

    supported_backends = ["databricks", "kubernetes"] + list(
        loader.MLFLOW_BACKENDS.keys())
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (backend_name, supported_backends))