def get_dependencies_from_conda_yaml(path): with open(path) as f: conda_env = yaml.safe_load(f) python = None build_dependencies = None unmatched_dependencies = [] dependencies = None for dep in conda_env.get("dependencies", []): if isinstance(dep, str): match = _CONDA_DEPENDENCY_REGEX.match(dep) if not match: unmatched_dependencies.append(dep) continue package = match.group("package") operator = match.group("operator") version = match.group("version") # Python if not python and package == "python": if operator is None: raise MlflowException.invalid_parameter_value( f"Invalid dependency for python: {dep}. " "It must be pinned (e.g. python=3.8.13)." ) if operator in ("<", ">", "!="): raise MlflowException( f"Invalid version comperator for python: '{operator}'. " "Must be one of ['<=', '>=', '=', '=='].", error_code=INVALID_PARAMETER_VALUE, ) python = version continue # Build packages if build_dependencies is None: build_dependencies = [] # "=" is an invalid operator for pip operator = "==" if operator == "=" else operator build_dependencies.append(package + (operator or "") + (version or "")) elif _is_pip_deps(dep): dependencies = dep["pip"] else: raise MlflowException( f"Invalid conda dependency: {dep}. Must be str or dict in the form of " '{"pip": [...]}', error_code=INVALID_PARAMETER_VALUE, ) if python is None: raise MlflowException( f"Could not extract python version from {path}", error_code=INVALID_PARAMETER_VALUE, ) if unmatched_dependencies: _logger.warning( "The following conda dependencies will not be installed in the resulting " "environment: %s", unmatched_dependencies, ) return dict(python=python, build_dependencies=build_dependencies, dependencies=dependencies)
def run( uri, entry_point="main", version=None, parameters=None, docker_args=None, experiment_name=None, experiment_id=None, backend="local", backend_config=None, use_conda=None, storage_dir=None, synchronous=True, run_id=None, run_name=None, env_manager=None, ): """ Run an MLflow project. The project can be local or stored at a Git URI. MLflow provides built-in support for running projects locally or remotely on a Databricks or Kubernetes cluster. You can also run projects against other targets by installing an appropriate third-party plugin. See `Community Plugins <../plugins.html#community-plugins>`_ for more information. For information on using this method in chained workflows, see `Building Multistep Workflows <../projects.html#building-multistep-workflows>`_. :raises: :py:class:`mlflow.exceptions.ExecutionException` If a run launched in blocking mode is unsuccessful. :param uri: URI of project to run. A local filesystem path or a Git repository URI (e.g. https://github.com/mlflow/mlflow-example) pointing to a project directory containing an MLproject file. :param entry_point: Entry point to run within the project. If no entry point with the specified name is found, runs the project file ``entry_point`` as a script, using "python" to run ``.py`` files and the default shell (specified by environment variable ``$SHELL``) to run ``.sh`` files. :param version: For Git-based projects, either a commit hash or a branch name. :param parameters: Parameters (dictionary) for the entry point command. :param docker_args: Arguments (dictionary) for the docker command. :param experiment_name: Name of experiment under which to launch the run. :param experiment_id: ID of experiment under which to launch the run. :param backend: Execution backend for the run: MLflow provides built-in support for "local", "databricks", and "kubernetes" (experimental) backends. If running against Databricks, will run against a Databricks workspace determined as follows: if a Databricks tracking URI of the form ``databricks://profile`` has been set (e.g. by setting the MLFLOW_TRACKING_URI environment variable), will run against the workspace specified by <profile>. Otherwise, runs against the workspace specified by the default Databricks CLI profile. :param backend_config: A dictionary, or a path to a JSON file (must end in '.json'), which will be passed as config to the backend. The exact content which should be provided is different for each execution backend and is documented at https://www.mlflow.org/docs/latest/projects.html. :param use_conda: This argument is deprecated. Use `env_manager='local'` instead. If True (the default), create a new Conda environment for the run and install project dependencies within that environment. Otherwise, run the project in the current environment without installing any project dependencies. :param storage_dir: Used only if ``backend`` is "local". MLflow downloads artifacts from distributed URIs passed to parameters of type ``path`` to subdirectories of ``storage_dir``. :param synchronous: Whether to block while waiting for a run to complete. Defaults to True. Note that if ``synchronous`` is False and ``backend`` is "local", this method will return, but the current process will block when exiting until the local run completes. If the current process is interrupted, any asynchronous runs launched via this method will be terminated. If ``synchronous`` is True and the run fails, the current process will error out as well. :param run_id: Note: this argument is used internally by the MLflow project APIs and should not be specified. If specified, the run ID will be used instead of creating a new run. :param run_name: The name to give the MLflow Run associated with the project execution. If ``None``, the MLflow Run name is left unset. :param env_manager: Specify an environment manager to create a new environment for the run and install project dependencies within that environment. The following values are suppported: - local: use the local environment - conda: use conda - virtualenv: use virtualenv (and pyenv for Python version management) If unspecified, default to conda. :return: :py:class:`mlflow.projects.SubmittedRun` exposing information (e.g. run ID) about the launched run. .. code-block:: python :caption: Example import mlflow project_uri = "https://github.com/mlflow/mlflow-example" params = {"alpha": 0.5, "l1_ratio": 0.01} # Run MLflow project and create a reproducible conda environment # on a local host mlflow.run(project_uri, parameters=params) .. code-block:: text :caption: Output ... ... Elasticnet model (alpha=0.500000, l1_ratio=0.010000): RMSE: 0.788347345611717 MAE: 0.6155576449938276 R2: 0.19729662005412607 ... mlflow.projects: === Run (ID '6a5109febe5e4a549461e149590d0a7c') succeeded === """ backend_config_dict = backend_config if backend_config is not None else {} if (backend_config and type(backend_config) != dict and os.path.splitext(backend_config)[-1] == ".json"): with open(backend_config, "r") as handle: try: backend_config_dict = json.load(handle) except ValueError: _logger.error( "Error when attempting to load and parse JSON cluster spec from file %s", backend_config, ) raise if use_conda is not None and env_manager is not None: raise MlflowException.invalid_parameter_value( "`use_conda` cannot be used with `env_manager`") elif use_conda is not None: warnings.warn( "`use_conda` is deprecated and will be removed in a future release. " "Use `env_manager=local` instead", FutureWarning, stacklevel=2, ) env_manager = _EnvManager.CONDA if use_conda else _EnvManager.LOCAL elif env_manager is not None: _EnvManager.validate(env_manager) if backend == "databricks": mlflow.projects.databricks.before_run_validations( mlflow.get_tracking_uri(), backend_config) elif backend == "local" and run_id is not None: backend_config_dict[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG] = run_id experiment_id = _resolve_experiment_id(experiment_name=experiment_name, experiment_id=experiment_id) submitted_run_obj = _run( uri=uri, experiment_id=experiment_id, entry_point=entry_point, version=version, parameters=parameters, docker_args=docker_args, backend_name=backend, backend_config=backend_config_dict, env_manager=env_manager, storage_dir=storage_dir, synchronous=synchronous, run_name=run_name, ) if synchronous: _wait_for(submitted_run_obj) return submitted_run_obj
def test_invalid_parameter_value(self): mlflow_exception = MlflowException.invalid_parameter_value("test") assert mlflow_exception.error_code == "INVALID_PARAMETER_VALUE"
def run( self, project_uri, entry_point, params, version, backend_config, tracking_uri, experiment_id ): work_dir = fetch_and_validate_project(project_uri, version, entry_point, params) project = load_project(work_dir) if MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config: run_id = backend_config[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG] else: run_id = None active_run = get_or_create_run( run_id, project_uri, experiment_id, work_dir, version, entry_point, params ) command_args = [] command_separator = " " env_manager = backend_config[PROJECT_ENV_MANAGER] synchronous = backend_config[PROJECT_SYNCHRONOUS] docker_args = backend_config[PROJECT_DOCKER_ARGS] storage_dir = backend_config[PROJECT_STORAGE_DIR] # Select an appropriate env manager for the project env type if env_manager is None: env_manager = _env_type_to_env_manager(project.env_type) else: if project.env_type == env_type.PYTHON and env_manager == _EnvManager.CONDA: raise MlflowException.invalid_parameter_value( "python_env project cannot be executed using conda. Set `--env-manager` to " "'virtualenv' or 'local' to execute this project." ) # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml # environments, so the project will be executed inside a docker container. if project.docker_env: from mlflow.projects.docker import ( validate_docker_env, validate_docker_installation, build_docker_image, ) tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker") validate_docker_env(project) validate_docker_installation() image = build_docker_image( work_dir=work_dir, repository_uri=project.name, base_image=project.docker_env.get("image"), run_id=active_run.info.run_id, ) command_args += _get_docker_command( image=image, active_run=active_run, docker_args=docker_args, volumes=project.docker_env.get("volumes"), user_env_vars=project.docker_env.get("environment"), ) # Synchronously create a conda environment (even though this may take some time) # to avoid failures due to multiple concurrent attempts to create the same conda env. elif env_manager == _EnvManager.VIRTUALENV: tracking.MlflowClient().set_tag( active_run.info.run_id, MLFLOW_PROJECT_ENV, "virtualenv" ) command_separator = " && " if project.env_type == env_type.CONDA: python_env = _PythonEnv.from_conda_yaml(project.env_config_path) else: python_env = _PythonEnv.from_yaml(project.env_config_path) python_bin_path = _install_python(python_env.python) env_root = _get_mlflow_virtualenv_root() work_dir_path = Path(work_dir) env_name = _get_virtualenv_name(python_env, work_dir_path) env_dir = Path(env_root).joinpath(env_name) activate_cmd = _create_virtualenv(work_dir_path, python_bin_path, env_dir, python_env) command_args += [activate_cmd] elif env_manager == _EnvManager.CONDA: tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "conda") command_separator = " && " conda_env_name = get_or_create_conda_env(project.env_config_path) command_args += get_conda_command(conda_env_name) # In synchronous mode, run the entry point command in a blocking fashion, sending status # updates to the tracking server when finished. Note that the run state may not be # persisted to the tracking server if interrupted if synchronous: command_args += get_entry_point_command(project, entry_point, params, storage_dir) command_str = command_separator.join(command_args) return _run_entry_point( command_str, work_dir, experiment_id, run_id=active_run.info.run_id ) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess( work_dir=work_dir, entry_point=entry_point, parameters=params, experiment_id=experiment_id, env_manager=env_manager, docker_args=docker_args, storage_dir=storage_dir, run_id=active_run.info.run_id, )