def _run_shell_command_job(self, project_uri, command, env_vars, cluster_spec): """ Run the specified shell command on a Databricks cluster. :param project_uri: URI of the project from which the shell command originates. :param command: Shell command to run. :param env_vars: Environment variables to set in the process running ``command``. :param cluster_spec: Dictionary containing a `Databricks cluster specification <https://docs.databricks.com/dev-tools/api/latest/jobs.html#clusterspec>`_ or a `Databricks new cluster specification <https://docs.databricks.com/dev-tools/api/latest/jobs.html#jobsclusterspecnewcluster>`_ to use when launching a run. If you specify libraries, this function will add MLflow to the library list. This function does not support installation of conda environment libraries on the workers. :return: ID of the Databricks job run. Can be used to query the run's status via the Databricks `Runs Get <https://docs.databricks.com/api/latest/jobs.html#runs-get>`_ API. """ if is_release_version(): libraries = [{"pypi": {"package": "mlflow==%s" % VERSION}}] else: # When running a non-release version as the client the same version will not be # available within Databricks. _logger.warning(( "Your client is running a non-release version of MLFlow. " "This version is not avaialable on the databricks runtime. " "MLFlow will fallback the MLFlow version provided by the runtime. " "This might lead to unforeseen issues. ")) libraries = [{"pypi": {"package": "'mlflow<=%s'" % VERSION}}] # Check syntax of JSON - if it contains libraries and new_cluster, pull those out if "new_cluster" in cluster_spec: # Libraries are optional, so we don't require that this be specified if "libraries" in cluster_spec: libraries.extend(cluster_spec["libraries"]) cluster_spec = cluster_spec["new_cluster"] # Make jobs API request to launch run. req_body_json = { "run_name": "MLflow Run for %s" % project_uri, "new_cluster": cluster_spec, "shell_command_task": { "command": command, "env_vars": env_vars }, "libraries": libraries, } _logger.info( "=== Submitting a run to execute the MLflow project... ===") run_submit_res = self._jobs_runs_submit(req_body_json) databricks_run_id = run_submit_res["run_id"] return databricks_run_id
def test_is_release_version(monkeypatch): monkeypatch.setattr(version, "VERSION", "1.19.0") assert version.is_release_version() monkeypatch.setattr(version, "VERSION", "1.19.0.dev0") assert not version.is_release_version()