def run_databricks(uri, entry_point, version, parameters, experiment_id, cluster_spec, git_username, git_password): """ Runs the project at the specified URI on Databricks, returning a `SubmittedRun` that can be used to query the run's status or wait for the resulting Databricks Job run to terminate. """ tracking_uri = tracking.get_tracking_uri() _before_run_validations(tracking_uri, cluster_spec) work_dir = _fetch_and_clean_project(uri=uri, version=version, git_username=git_username, git_password=git_password) project = _project_spec.load_project(work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) dbfs_fuse_uri = _upload_project_to_dbfs(work_dir, experiment_id) remote_run = tracking._create_run( experiment_id=experiment_id, source_name=_expand_uri(uri), source_version=tracking._get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) env_vars = { tracking._TRACKING_URI_ENV_VAR: tracking_uri, tracking._EXPERIMENT_ID_ENV_VAR: experiment_id, } run_id = remote_run.run_info.run_uuid eprint("=== Running entry point %s of project %s on Databricks. ===" % (entry_point, uri)) # Launch run on Databricks with open(cluster_spec, 'r') as handle: try: cluster_spec = json.load(handle) except ValueError: eprint( "Error when attempting to load and parse JSON cluster spec from file " "%s. " % cluster_spec) raise command = _get_databricks_run_cmd(dbfs_fuse_uri, run_id, entry_point, parameters) db_run_id = _run_shell_command_job(uri, command, env_vars, cluster_spec) return DatabricksSubmittedRun(db_run_id, run_id)
def test_load_project(tmpdir, mlproject, conda_env_path, conda_env_contents, mlproject_path): """ Test that we can load a project with various combinations of an MLproject / conda.yaml file :param mlproject: Contents of MLproject file. If None, no MLproject file will be written :param conda_env_path: Path to conda environment file. If None, no conda environment file will be written. :param conda_env_contents: Contents of conda environment file (written if conda_env_path is not None) """ if mlproject: tmpdir.join(mlproject_path).write(mlproject) if conda_env_path: tmpdir.join(conda_env_path).write(conda_env_contents) project = _project_spec.load_project(tmpdir.strpath) assert project._entry_points == {} expected_env_path = ( os.path.abspath(os.path.join(tmpdir.strpath, conda_env_path)) if conda_env_path else None ) assert project.env_config_path == expected_env_path if conda_env_path: assert open(project.env_config_path).read() == conda_env_contents
def _run(uri, experiment_id, entry_point="main", version=None, parameters=None, backend=None, backend_config=None, use_conda=True, storage_dir=None, synchronous=True, run_id=None): """ Helper that delegates to the project-running method corresponding to the passed-in backend. Returns a ``SubmittedRun`` corresponding to the project run. """ parameters = parameters or {} work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version) project = _project_spec.load_project(work_dir) _validate_execution_environment(project, backend) project.get_entry_point(entry_point)._validate_parameters(parameters) if run_id: active_run = tracking.MlflowClient().get_run(run_id) else: active_run = _create_run(uri, experiment_id, work_dir, entry_point) # Consolidate parameters for logging. # `storage_dir` is `None` since we want to log actual path not downloaded local path entry_point_obj = project.get_entry_point(entry_point) final_params, extra_params = entry_point_obj.compute_parameters(parameters, storage_dir=None) for key, value in (list(final_params.items()) + list(extra_params.items())): tracking.MlflowClient().log_param(active_run.info.run_id, key, value) repo_url = _get_git_repo_url(work_dir) if repo_url is not None: for tag in [MLFLOW_GIT_REPO_URL, LEGACY_MLFLOW_GIT_REPO_URL]: tracking.MlflowClient().set_tag(active_run.info.run_id, tag, repo_url) # Add branch name tag if a branch is specified through -version if _is_valid_branch_name(work_dir, version): for tag in [MLFLOW_GIT_BRANCH, LEGACY_MLFLOW_GIT_BRANCH_NAME]: tracking.MlflowClient().set_tag(active_run.info.run_id, tag, version) if backend == "databricks": tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "databricks") from mlflow.projects.databricks import run_databricks return run_databricks( remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=experiment_id, cluster_spec=backend_config) elif backend == "local" or backend is None: command = [] command_separator = " " # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml # environments, so the project will be executed inside a docker container. if project.docker_env: tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker") tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "local") _validate_docker_env(project) _validate_docker_installation() image = _build_docker_image(work_dir=work_dir, repository_uri=project.name, base_image=project.docker_env.get('image'), run_id=active_run.info.run_id) command += _get_docker_command(image=image, active_run=active_run) # Synchronously create a conda environment (even though this may take some time) # to avoid failures due to multiple concurrent attempts to create the same conda env. elif use_conda: tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "conda") tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "local") command_separator = " && " conda_env_name = _get_or_create_conda_env(project.conda_env_path) command += _get_conda_command(conda_env_name) # In synchronous mode, run the entry point command in a blocking fashion, sending status # updates to the tracking server when finished. Note that the run state may not be # persisted to the tracking server if interrupted if synchronous: command += _get_entry_point_command(project, entry_point, parameters, storage_dir) command = command_separator.join(command) return _run_entry_point(command, work_dir, experiment_id, run_id=active_run.info.run_id) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess( work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=experiment_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_id) elif backend == "kubernetes": from mlflow.projects import kubernetes as kb tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker") tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "kubernetes") _validate_docker_env(project) _validate_docker_installation() kube_config = _parse_kubernetes_config(backend_config) image = _build_docker_image(work_dir=work_dir, repository_uri=kube_config["repository-uri"], base_image=project.docker_env.get('image'), run_id=active_run.info.run_id) image_digest = kb.push_image_to_registry(image.tags[0]) submitted_run = kb.run_kubernetes_job(project.name, active_run, image.tags[0], image_digest, _get_entry_point_command(project, entry_point, parameters, storage_dir), _get_run_env_vars( run_id=active_run.info.run_uuid, experiment_id=active_run.info.experiment_id), kube_config['kube-context'], kube_config['kube-job-template']) return submitted_run supported_backends = ["local", "databricks", "kubernetes"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (backend, supported_backends))
def load_project(): """ Loads an example project for use in tests, returning an in-memory `Project` object. """ return _project_spec.load_project(TEST_PROJECT_DIR)
def test_docker_invalid_project_backend_local(): work_dir = "./examples/docker" project = _project_spec.load_project(work_dir) project.name = None with pytest.raises(ExecutionException): mlflow.projects.docker.validate_docker_env(project)
def test_docker_valid_project_backend_local(): work_dir = "./examples/docker" project = _project_spec.load_project(work_dir) mlflow.projects.docker.validate_docker_env(project)
def test_load_invalid_project(tmpdir, invalid_project_contents, expected_error_msg): tmpdir.join("MLproject").write(invalid_project_contents) with pytest.raises(ExecutionException, match=expected_error_msg) as e: _project_spec.load_project(tmpdir.strpath) assert expected_error_msg in str(e.value)
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None, mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None, block=True, run_id=None): """ Helper that delegates to the project-running method corresponding to the passed-in mode. Returns a ``SubmittedRun`` corresponding to the project run. """ exp_id = experiment_id or _get_experiment_id() parameters = parameters or {} work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version, git_username=git_username, git_password=git_password) project = _project_spec.load_project(work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) if run_id: active_run = tracking.get_service().get_run(run_id) else: active_run = _create_run(uri, exp_id, work_dir, entry_point) # Consolidate parameters for logging. # `storage_dir` is `None` since we want to log actual path not downloaded local path entry_point_obj = project.get_entry_point(entry_point) final_params, extra_params = entry_point_obj.compute_parameters( parameters, storage_dir=None) for key, value in (list(final_params.items()) + list(extra_params.items())): tracking.get_service().log_param(active_run.info.run_uuid, key, value) if mode == "databricks": from mlflow.projects.databricks import run_databricks return run_databricks(remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=exp_id, cluster_spec=cluster_spec) elif mode == "local" or mode is None: # Synchronously create a conda environment (even though this may take some time) to avoid # failures due to multiple concurrent attempts to create the same conda env. conda_env_name = _get_or_create_conda_env( project.conda_env_path) if use_conda else None # In blocking mode, run the entry point command in blocking fashion, sending status updates # to the tracking server when finished. Note that the run state may not be persisted to the # tracking server if interrupted if block: command = _get_entry_point_command(project, entry_point, parameters, conda_env_name, storage_dir) return _run_entry_point(command, work_dir, exp_id, run_id=active_run.info.run_uuid) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess(work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_uuid) supported_modes = ["local", "databricks"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (mode, supported_modes))
def _run(uri, entry_point="main", version=None, parameters=None, experiment_name=None, experiment_id=None, mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None, block=True, run_id=None): """ Helper that delegates to the project-running method corresponding to the passed-in mode. Returns a ``SubmittedRun`` corresponding to the project run. """ if mode == "databricks": mlflow.projects.databricks.before_run_validations( mlflow.get_tracking_uri(), cluster_spec) if experiment_name: exp_id = tracking.MlflowClient().get_experiment_by_name( experiment_name) else: exp_id = experiment_id or _get_experiment_id() parameters = parameters or {} work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version, git_username=git_username, git_password=git_password) project = _project_spec.load_project(work_dir) _validate_execution_environment(project, mode) project.get_entry_point(entry_point)._validate_parameters(parameters) if run_id: active_run = tracking.MlflowClient().get_run(run_id) else: active_run = _create_run(uri, exp_id, work_dir, entry_point) # Consolidate parameters for logging. # `storage_dir` is `None` since we want to log actual path not downloaded local path entry_point_obj = project.get_entry_point(entry_point) final_params, extra_params = entry_point_obj.compute_parameters( parameters, storage_dir=None) for key, value in (list(final_params.items()) + list(extra_params.items())): tracking.MlflowClient().log_param(active_run.info.run_uuid, key, value) repo_url = _get_git_repo_url(work_dir) if repo_url is not None: tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_GIT_REPO_URL, repo_url) # Add branch name tag if a branch is specified through -version if _is_valid_branch_name(work_dir, version): tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_GIT_BRANCH_NAME, version) if mode == "databricks": from mlflow.projects.databricks import run_databricks return run_databricks(remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=exp_id, cluster_spec=cluster_spec) elif mode == "local" or mode is None: command = [] command_separator = " " # If a docker_env attribute is defined in MLProject then it takes precedence over conda yaml # environments, so the project will be executed inside a docker container. if project.docker_env: tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_ENV, MLFLOW_DOCKER) _validate_docker_env(project.docker_env) _validate_docker_installation() image = _build_docker_image(work_dir=work_dir, project=project, active_run=active_run) command += _get_docker_command(image=image, active_run=active_run) # Synchronously create a conda environment (even though this may take some time) # to avoid failures due to multiple concurrent attempts to create the same conda env. elif use_conda: tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_ENV, MLFLOW_CONDA) command_separator = " && " conda_env_name = _get_or_create_conda_env(project.conda_env_path) command += _get_conda_command(conda_env_name) # In blocking mode, run the entry point command in blocking fashion, sending status updates # to the tracking server when finished. Note that the run state may not be persisted to the # tracking server if interrupted if block: command += _get_entry_point_command(project, entry_point, parameters, storage_dir) command = command_separator.join(command) return _run_entry_point(command, work_dir, exp_id, run_id=active_run.info.run_uuid) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess(work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_uuid) supported_modes = ["local", "databricks"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (mode, supported_modes))
def load_project(work_dir): return _project_spec.load_project(work_dir)
def fetch_and_validate_project(uri, version, entry_point, parameters): parameters = parameters or {} work_dir = _fetch_project(uri=uri, version=version) project = _project_spec.load_project(work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) return work_dir
def load_project(): return _project_spec.load_project(directory=TEST_PROJECT_DIR)