Exemplo n.º 1
0
 def run(self, project_uri, entry_point, params, version, backend_config,
         tracking_uri, experiment_id):
     work_dir = fetch_and_validate_project(project_uri, version,
                                           entry_point, params)
     active_run = get_or_create_run(None, project_uri, experiment_id,
                                    work_dir, version, entry_point, params)
     return DummySubmittedRun(active_run.info.run_id)
Exemplo n.º 2
0
    def run(self, project_uri, entry_point, params, version, backend_config,
            tracking_uri, experiment_id):
        # doing this handling bc positional argument fix not in mlflow <= 1.10.0 release
        # https://github.com/mlflow/mlflow/issues/3138
        if _AZUREML_URI not in tracking_uri and _AZUREML_URI in experiment_id:
            tracking_uri, experiment_id = experiment_id, tracking_uri

        # use_conda value from mlflow.project.run call propagated to backend_config
        # release after 1.10.0, so if use_conda key not in backend config, assume to be True
        # if the user hasn't passed a backend_config and has set backend="azureml", assume
        # that it's a local run using local conda environment (i.e. use_conda = False)
        if backend_config is None:
            backend_config = {PROJECT_USE_CONDA: False}
        elif PROJECT_USE_CONDA not in backend_config:
            backend_config[PROJECT_USE_CONDA] = True
        use_conda = backend_config[PROJECT_USE_CONDA]
        stream_output = backend_config[
            STREAM_OUTPUT] if STREAM_OUTPUT in backend_config else True
        compute = backend_config[COMPUTE] if COMPUTE in backend_config else None

        try:
            work_dir = fetch_and_validate_project(project_uri, version,
                                                  entry_point, params)
            mlproject = load_project(work_dir)
        except ExecutionException as e:
            raise ExecutionException(e)
        # process mlflow parameters into a format usable for AzureML ScriptRunConfig
        command_args = []
        command_args += get_entry_point_command(mlproject, entry_point, params,
                                                None)

        # components for launching an AzureML ScriptRun
        workspace = load_azure_workspace()
        experiment = _load_azure_experiment(workspace, experiment_id)

        # TODO: mlflow system tag mlflow.source.name is null after the switch from script, args to command
        src = ScriptRunConfig(source_directory=work_dir, command=command_args)

        # in case customer sets target to local
        if compute and compute != _LOCAL and compute != _LOCAL.upper():
            remote_environment = _load_remote_environment(mlproject)
            registered_env = remote_environment.register(workspace=workspace)
            cpu_cluster = _load_compute_target(workspace, backend_config)
            src.run_config.target = cpu_cluster.name
            src.run_config.environment = registered_env
        else:
            local_environment = _load_local_environment(mlproject, use_conda)
            src.run_config.environment = local_environment
        submitted_run = experiment.submit(config=src)
        _logger.info(
            _CONSOLE_MSG.format(
                "AzureML-Mlflow {} Experiment submitted".format(
                    experiment.name)))
        return AzureMLSubmittedRun(submitted_run, stream_output)
Exemplo n.º 3
0
def test_fetch_create_and_log(tmpdir):
    entry_point_name = "entry_point"
    parameters = {
        "method_name": "string",
    }
    entry_point = _project_spec.EntryPoint(entry_point_name, parameters,
                                           "run_model.sh")
    mock_fetched_project = _project_spec.Project(
        env_type="local",
        env_config_path=None,
        entry_points={entry_point_name: entry_point},
        docker_env=None,
        name="my_project",
    )
    experiment_id = mlflow.create_experiment("test_fetch_project")
    expected_dir = tmpdir
    project_uri = "http://someuri/myproject.git"
    user_param = {"method_name": "newton"}
    with mock.patch("mlflow.projects.utils._fetch_project",
                    return_value=expected_dir):
        with mock.patch("mlflow.projects._project_spec.load_project",
                        return_value=mock_fetched_project):
            work_dir = fetch_and_validate_project("", "", entry_point_name,
                                                  user_param)
            project = load_project(work_dir)
            assert mock_fetched_project == project
            assert expected_dir == work_dir
            # Create a run
            active_run = get_or_create_run(
                run_id=None,
                uri=project_uri,
                experiment_id=experiment_id,
                work_dir=work_dir,
                version=None,
                entry_point=entry_point_name,
                parameters=user_param,
            )

            # check tags
            run = mlflow.get_run(active_run.info.run_id)
            assert MLFLOW_PROJECT_ENTRY_POINT in run.data.tags
            assert MLFLOW_SOURCE_NAME in run.data.tags
            assert entry_point_name == run.data.tags[
                MLFLOW_PROJECT_ENTRY_POINT]
            assert project_uri == run.data.tags[MLFLOW_SOURCE_NAME]
            assert user_param == run.data.params
Exemplo n.º 4
0
 def run(self, project_uri, entry_point, params, version, backend_config,
         tracking_uri, experiment_id):
     work_dir = fetch_and_validate_project(project_uri, version,
                                           entry_point, params)
     project = load_project(work_dir)
     if MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config:
         run_id = backend_config[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG]
     else:
         run_id = None
     active_run = get_or_create_run(run_id, project_uri, experiment_id,
                                    work_dir, version, entry_point, params)
     command_args = []
     command_separator = " "
     use_conda = backend_config[PROJECT_USE_CONDA]
     synchronous = backend_config[PROJECT_SYNCHRONOUS]
     docker_args = backend_config[PROJECT_DOCKER_ARGS]
     storage_dir = backend_config[PROJECT_STORAGE_DIR]
     # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml
     # environments, so the project will be executed inside a docker container.
     if project.docker_env:
         tracking.MlflowClient().set_tag(active_run.info.run_id,
                                         MLFLOW_PROJECT_ENV, "docker")
         validate_docker_env(project)
         validate_docker_installation()
         image = build_docker_image(
             work_dir=work_dir,
             repository_uri=project.name,
             base_image=project.docker_env.get('image'),
             run_id=active_run.info.run_id)
         command_args += _get_docker_command(
             image=image,
             active_run=active_run,
             docker_args=docker_args,
             volumes=project.docker_env.get("volumes"),
             user_env_vars=project.docker_env.get("environment"))
     # Synchronously create a conda environment (even though this may take some time)
     # to avoid failures due to multiple concurrent attempts to create the same conda env.
     elif use_conda:
         tracking.MlflowClient().set_tag(active_run.info.run_id,
                                         MLFLOW_PROJECT_ENV, "conda")
         command_separator = " && "
         conda_env_name = get_or_create_conda_env(project.conda_env_path)
         command_args += get_conda_command(conda_env_name)
     # In synchronous mode, run the entry point command in a blocking fashion, sending status
     # updates to the tracking server when finished. Note that the run state may not be
     # persisted to the tracking server if interrupted
     if synchronous:
         command_args += get_entry_point_command(project, entry_point,
                                                 params, storage_dir)
         command_str = command_separator.join(command_args)
         return _run_entry_point(command_str,
                                 work_dir,
                                 experiment_id,
                                 run_id=active_run.info.run_id)
     # Otherwise, invoke `mlflow run` in a subprocess
     return _invoke_mlflow_run_subprocess(work_dir=work_dir,
                                          entry_point=entry_point,
                                          parameters=params,
                                          experiment_id=experiment_id,
                                          use_conda=use_conda,
                                          storage_dir=storage_dir,
                                          run_id=active_run.info.run_id)
Exemplo n.º 5
0
def _run(uri,
         experiment_id,
         entry_point="main",
         version=None,
         parameters=None,
         docker_args=None,
         backend_name=None,
         backend_config=None,
         use_conda=True,
         storage_dir=None,
         synchronous=True):
    """
    Helper that delegates to the project-running method corresponding to the passed-in backend.
    Returns a ``SubmittedRun`` corresponding to the project run.
    """
    tracking_store_uri = tracking.get_tracking_uri()
    if backend_name:
        backend = loader.load_backend(backend_name)
        if backend:
            submitted_run = backend.run(uri, entry_point, parameters, version,
                                        backend_config, experiment_id,
                                        tracking_store_uri)
            tracking.MlflowClient().set_tag(submitted_run.run_id,
                                            MLFLOW_PROJECT_BACKEND,
                                            backend_name)
            return submitted_run

    work_dir = fetch_and_validate_project(uri, version, entry_point,
                                          parameters)
    project = load_project(work_dir)
    _validate_execution_environment(project, backend_name)

    existing_run_id = None
    if backend_name == "local" and _MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config:
        existing_run_id = backend_config[_MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG]
    active_run = get_or_create_run(existing_run_id, uri, experiment_id,
                                   work_dir, version, entry_point, parameters)

    if backend_name == "databricks":
        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_BACKEND, "databricks")
        from mlflow.projects.databricks import run_databricks
        return run_databricks(remote_run=active_run,
                              uri=uri,
                              entry_point=entry_point,
                              work_dir=work_dir,
                              parameters=parameters,
                              experiment_id=experiment_id,
                              cluster_spec=backend_config)

    elif backend_name == "local" or backend_name is None:
        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_BACKEND, "local")
        command_args = []
        command_separator = " "
        # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml
        # environments, so the project will be executed inside a docker container.
        if project.docker_env:
            tracking.MlflowClient().set_tag(active_run.info.run_id,
                                            MLFLOW_PROJECT_ENV, "docker")
            _validate_docker_env(project)
            _validate_docker_installation()
            image = _build_docker_image(
                work_dir=work_dir,
                repository_uri=project.name,
                base_image=project.docker_env.get('image'),
                run_id=active_run.info.run_id)
            command_args += _get_docker_command(
                image=image,
                active_run=active_run,
                docker_args=docker_args,
                volumes=project.docker_env.get("volumes"),
                user_env_vars=project.docker_env.get("environment"))
        # Synchronously create a conda environment (even though this may take some time)
        # to avoid failures due to multiple concurrent attempts to create the same conda env.
        elif use_conda:
            tracking.MlflowClient().set_tag(active_run.info.run_id,
                                            MLFLOW_PROJECT_ENV, "conda")
            command_separator = " && "
            conda_env_name = _get_or_create_conda_env(project.conda_env_path)
            command_args += _get_conda_command(conda_env_name)
        # In synchronous mode, run the entry point command in a blocking fashion, sending status
        # updates to the tracking server when finished. Note that the run state may not be
        # persisted to the tracking server if interrupted
        if synchronous:
            command_args += _get_entry_point_command(project, entry_point,
                                                     parameters, storage_dir)
            command_str = command_separator.join(command_args)
            return _run_entry_point(command_str,
                                    work_dir,
                                    experiment_id,
                                    run_id=active_run.info.run_id)
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(work_dir=work_dir,
                                             entry_point=entry_point,
                                             parameters=parameters,
                                             experiment_id=experiment_id,
                                             use_conda=use_conda,
                                             storage_dir=storage_dir,
                                             run_id=active_run.info.run_id)
    elif backend_name == "kubernetes":
        from mlflow.projects import kubernetes as kb
        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_ENV, "docker")
        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_BACKEND, "kubernetes")
        _validate_docker_env(project)
        _validate_docker_installation()
        kube_config = _parse_kubernetes_config(backend_config)
        image = _build_docker_image(
            work_dir=work_dir,
            repository_uri=kube_config["repository-uri"],
            base_image=project.docker_env.get('image'),
            run_id=active_run.info.run_id)
        image_digest = kb.push_image_to_registry(image.tags[0])
        submitted_run = kb.run_kubernetes_job(
            project.name, active_run, image.tags[0], image_digest,
            _get_entry_point_command(project, entry_point, parameters,
                                     storage_dir),
            _get_run_env_vars(run_id=active_run.info.run_uuid,
                              experiment_id=active_run.info.experiment_id),
            kube_config.get('kube-context', None),
            kube_config['kube-job-template'])
        return submitted_run

    supported_backends = ["local", "databricks", "kubernetes"]
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (backend_name, supported_backends))
Exemplo n.º 6
0
def _run(
    uri,
    experiment_id,
    entry_point,
    version,
    parameters,
    docker_args,
    backend_name,
    backend_config,
    use_conda,
    storage_dir,
    synchronous,
):
    """
    Helper that delegates to the project-running method corresponding to the passed-in backend.
    Returns a ``SubmittedRun`` corresponding to the project run.
    """
    tracking_store_uri = tracking.get_tracking_uri()
    backend_config[PROJECT_USE_CONDA] = use_conda
    backend_config[PROJECT_SYNCHRONOUS] = synchronous
    backend_config[PROJECT_DOCKER_ARGS] = docker_args
    backend_config[PROJECT_STORAGE_DIR] = storage_dir
    # TODO: remove this check once kubernetes execution has been refactored
    if backend_name not in {"databricks", "kubernetes"}:
        backend = loader.load_backend(backend_name)
        if backend:
            submitted_run = backend.run(
                uri,
                entry_point,
                parameters,
                version,
                backend_config,
                tracking_store_uri,
                experiment_id,
            )
            tracking.MlflowClient().set_tag(submitted_run.run_id,
                                            MLFLOW_PROJECT_BACKEND,
                                            backend_name)
            return submitted_run

    work_dir = fetch_and_validate_project(uri, version, entry_point,
                                          parameters)
    project = load_project(work_dir)
    _validate_execution_environment(project, backend_name)

    active_run = get_or_create_run(None, uri, experiment_id, work_dir, version,
                                   entry_point, parameters)

    if backend_name == "databricks":
        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_BACKEND, "databricks")
        from mlflow.projects.databricks import run_databricks

        return run_databricks(
            remote_run=active_run,
            uri=uri,
            entry_point=entry_point,
            work_dir=work_dir,
            parameters=parameters,
            experiment_id=experiment_id,
            cluster_spec=backend_config,
        )

    elif backend_name == "kubernetes":
        from mlflow.projects.docker import (
            build_docker_image,
            validate_docker_env,
            validate_docker_installation,
        )
        from mlflow.projects import kubernetes as kb

        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_ENV, "docker")
        tracking.MlflowClient().set_tag(active_run.info.run_id,
                                        MLFLOW_PROJECT_BACKEND, "kubernetes")
        validate_docker_env(project)
        validate_docker_installation()
        kube_config = _parse_kubernetes_config(backend_config)
        image = build_docker_image(
            work_dir=work_dir,
            repository_uri=kube_config["repository-uri"],
            base_image=project.docker_env.get("image"),
            run_id=active_run.info.run_id,
        )
        image_digest = kb.push_image_to_registry(image.tags[0])
        submitted_run = kb.run_kubernetes_job(
            project.name,
            active_run,
            image.tags[0],
            image_digest,
            get_entry_point_command(project, entry_point, parameters,
                                    storage_dir),
            get_run_env_vars(run_id=active_run.info.run_uuid,
                             experiment_id=active_run.info.experiment_id),
            kube_config.get("kube-context", None),
            kube_config["kube-job-template"],
        )
        return submitted_run

    supported_backends = ["databricks", "kubernetes"] + list(
        loader.MLFLOW_BACKENDS.keys())
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (backend_name, supported_backends))
Exemplo n.º 7
0
    def run(self, project_uri: str, entry_point: str, params: Dict,
            version: str, backend_config: Dict, tracking_uri: str,
            experiment_id: str) -> SubmittedRun:
        _logger.info('using yarn backend')
        _logger.info(locals())
        work_dir = fetch_and_validate_project(project_uri, version,
                                              entry_point, params)
        active_run = get_or_create_run(None, project_uri, experiment_id,
                                       work_dir, version, entry_point, params)
        _logger.info(f"run_id={active_run.info.run_id}")
        _logger.info(f"work_dir={work_dir}")
        project = load_project(work_dir)

        storage_dir = backend_config[PROJECT_STORAGE_DIR]

        entry_point_command = project.get_entry_point(entry_point)\
            .compute_command(params, storage_dir)

        _logger.info(f"entry_point_command={entry_point_command}")

        if project.conda_env_path:
            spec_file = project.conda_env_path
        else:
            spec_file = os.path.join(work_dir, "requirements.txt")
            if not os.path.exists(spec_file):
                raise ValueError

        package_path = cluster_pack.upload_spec(spec_file)
        _logger.info(package_path)

        additional_files = []
        for file in os.listdir(work_dir):
            full_path = os.path.join(work_dir, file)
            if os.path.isfile(full_path):
                additional_files.append(full_path)

        entry_point, args = try_split_cmd(entry_point_command)

        _logger.info(f"args {entry_point} {args}")

        if "MLFLOW_YARN_TESTS" in os.environ:
            # we need to have a real tracking server setup to be able to push the run id here
            env = {"MLFLOW_TRACKING_URI": "file:/tmp/mlflow"}
        else:
            env = {
                "MLFLOW_RUN_ID": active_run.info.run_id,
                "MLFLOW_TRACKING_URI": mlflow.get_tracking_uri(),
                "MLFLOW_EXPERIMENT_ID": experiment_id
            }

        _backend_dict = _get_backend_dict(work_dir)
        # update config with what has been passed with --backend-config <json-new-config>
        for key in _backend_dict.keys():
            if key in backend_config:
                _backend_dict[key] = backend_config[key]

        _logger.info(f"backend config: {_backend_dict}")

        app_id = skein_launcher.submit(self._skein_client,
                                       module_name=entry_point,
                                       args=args,
                                       package_path=package_path,
                                       additional_files=additional_files,
                                       env_vars=env,
                                       process_logs=_upload_logs,
                                       **_backend_dict)

        MlflowClient().set_tag(active_run.info.run_id, "skein_application_id",
                               app_id)
        return YarnSubmittedRun(self._skein_client, app_id,
                                active_run.info.run_id)
Exemplo n.º 8
0
    def run(
        self, project_uri, entry_point, params, version, backend_config, tracking_uri, experiment_id
    ):
        work_dir = fetch_and_validate_project(project_uri, version, entry_point, params)
        project = load_project(work_dir)
        if MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config:
            run_id = backend_config[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG]
        else:
            run_id = None
        active_run = get_or_create_run(
            run_id, project_uri, experiment_id, work_dir, version, entry_point, params
        )
        command_args = []
        command_separator = " "
        env_manager = backend_config[PROJECT_ENV_MANAGER]
        synchronous = backend_config[PROJECT_SYNCHRONOUS]
        docker_args = backend_config[PROJECT_DOCKER_ARGS]
        storage_dir = backend_config[PROJECT_STORAGE_DIR]

        # Select an appropriate env manager for the project env type
        if env_manager is None:
            env_manager = _env_type_to_env_manager(project.env_type)
        else:
            if project.env_type == env_type.PYTHON and env_manager == _EnvManager.CONDA:
                raise MlflowException.invalid_parameter_value(
                    "python_env project cannot be executed using conda. Set `--env-manager` to "
                    "'virtualenv' or 'local' to execute this project."
                )

        # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml
        # environments, so the project will be executed inside a docker container.
        if project.docker_env:
            from mlflow.projects.docker import (
                validate_docker_env,
                validate_docker_installation,
                build_docker_image,
            )

            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker")
            validate_docker_env(project)
            validate_docker_installation()
            image = build_docker_image(
                work_dir=work_dir,
                repository_uri=project.name,
                base_image=project.docker_env.get("image"),
                run_id=active_run.info.run_id,
            )
            command_args += _get_docker_command(
                image=image,
                active_run=active_run,
                docker_args=docker_args,
                volumes=project.docker_env.get("volumes"),
                user_env_vars=project.docker_env.get("environment"),
            )
        # Synchronously create a conda environment (even though this may take some time)
        # to avoid failures due to multiple concurrent attempts to create the same conda env.
        elif env_manager == _EnvManager.VIRTUALENV:
            tracking.MlflowClient().set_tag(
                active_run.info.run_id, MLFLOW_PROJECT_ENV, "virtualenv"
            )
            command_separator = " && "
            if project.env_type == env_type.CONDA:
                python_env = _PythonEnv.from_conda_yaml(project.env_config_path)
            else:
                python_env = _PythonEnv.from_yaml(project.env_config_path)
            python_bin_path = _install_python(python_env.python)
            env_root = _get_mlflow_virtualenv_root()
            work_dir_path = Path(work_dir)
            env_name = _get_virtualenv_name(python_env, work_dir_path)
            env_dir = Path(env_root).joinpath(env_name)
            activate_cmd = _create_virtualenv(work_dir_path, python_bin_path, env_dir, python_env)
            command_args += [activate_cmd]
        elif env_manager == _EnvManager.CONDA:
            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "conda")
            command_separator = " && "
            conda_env_name = get_or_create_conda_env(project.env_config_path)
            command_args += get_conda_command(conda_env_name)

        # In synchronous mode, run the entry point command in a blocking fashion, sending status
        # updates to the tracking server when finished. Note that the run state may not be
        # persisted to the tracking server if interrupted
        if synchronous:
            command_args += get_entry_point_command(project, entry_point, params, storage_dir)
            command_str = command_separator.join(command_args)
            return _run_entry_point(
                command_str, work_dir, experiment_id, run_id=active_run.info.run_id
            )
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(
            work_dir=work_dir,
            entry_point=entry_point,
            parameters=params,
            experiment_id=experiment_id,
            env_manager=env_manager,
            docker_args=docker_args,
            storage_dir=storage_dir,
            run_id=active_run.info.run_id,
        )