示例#1
0
    def _run_shell_command_job(self, project_uri, command, env_vars,
                               cluster_spec):
        """
        Run the specified shell command on a Databricks cluster.

        :param project_uri: URI of the project from which the shell command originates.
        :param command: Shell command to run.
        :param env_vars: Environment variables to set in the process running ``command``.
        :param cluster_spec: Dictionary containing a `Databricks cluster specification
                             <https://docs.databricks.com/dev-tools/api/latest/jobs.html#clusterspec>`_
                             or a `Databricks new cluster specification
                             <https://docs.databricks.com/dev-tools/api/latest/jobs.html#jobsclusterspecnewcluster>`_
                             to use when launching a run. If you specify libraries, this function
                             will add MLflow to the library list. This function does not support
                             installation of conda environment libraries on the workers.
        :return: ID of the Databricks job run. Can be used to query the run's status via the
                 Databricks
                 `Runs Get <https://docs.databricks.com/api/latest/jobs.html#runs-get>`_ API.
        """
        if is_release_version():
            libraries = [{"pypi": {"package": "mlflow==%s" % VERSION}}]
        else:
            # When running a non-release version as the client the same version will not be
            # available within Databricks.
            _logger.warning((
                "Your client is running a non-release version of MLFlow. "
                "This version is not avaialable on the databricks runtime. "
                "MLFlow will fallback the MLFlow version provided by the runtime. "
                "This might lead to unforeseen issues. "))
            libraries = [{"pypi": {"package": "'mlflow<=%s'" % VERSION}}]

        # Check syntax of JSON - if it contains libraries and new_cluster, pull those out
        if "new_cluster" in cluster_spec:
            # Libraries are optional, so we don't require that this be specified
            if "libraries" in cluster_spec:
                libraries.extend(cluster_spec["libraries"])
            cluster_spec = cluster_spec["new_cluster"]

        # Make jobs API request to launch run.
        req_body_json = {
            "run_name": "MLflow Run for %s" % project_uri,
            "new_cluster": cluster_spec,
            "shell_command_task": {
                "command": command,
                "env_vars": env_vars
            },
            "libraries": libraries,
        }
        _logger.info(
            "=== Submitting a run to execute the MLflow project... ===")
        run_submit_res = self._jobs_runs_submit(req_body_json)
        databricks_run_id = run_submit_res["run_id"]
        return databricks_run_id
示例#2
0
def test_is_release_version(monkeypatch):
    monkeypatch.setattr(version, "VERSION", "1.19.0")
    assert version.is_release_version()

    monkeypatch.setattr(version, "VERSION", "1.19.0.dev0")
    assert not version.is_release_version()