def _preprocess_cluster_args(api_client: ApiClient, cluster_name: Optional[str], cluster_id: Optional[str]) -> str: cluster_service = ClusterService(api_client) if not cluster_name and not cluster_id: raise RuntimeError( "Parameters --cluster-name and --cluster-id couldn't be empty at the same time." ) if cluster_name: existing_clusters = cluster_service.list_clusters().get("clusters") matching_clusters = [ c for c in existing_clusters if c.get("cluster_name") == cluster_name ] if not matching_clusters: raise NameError(f"No clusters with name {cluster_name} found") if len(matching_clusters) > 1: raise NameError( f"Found more then one cluster with name {cluster_name}: {matching_clusters}" ) cluster_id = matching_clusters[0]["cluster_id"] else: if not cluster_service.get_cluster(cluster_id): raise NameError(f"Cluster with id {cluster_id} not found") return cluster_id
def awake_cluster(cluster_service: ClusterService, cluster_id): cluster_info = cluster_service.get_cluster(cluster_id) if cluster_info["state"] in ["RUNNING", "RESIZING"]: dbx_echo("Cluster is ready") if cluster_info["state"] in ["TERMINATED", "TERMINATING"]: dbx_echo("Dev cluster is terminated, starting it") cluster_service.start_cluster(cluster_id) time.sleep(5) awake_cluster(cluster_service, cluster_id) elif cluster_info["state"] == "ERROR": raise RuntimeError("Cluster is mis-configured and cannot be started, please check cluster settings at first") elif cluster_info["state"] in ["PENDING", "RESTARTING"]: dbx_echo(f'Cluster is getting prepared, current state: {cluster_info["state"]}') time.sleep(5) awake_cluster(cluster_service, cluster_id)
def execute( environment: str, cluster_id: str, cluster_name: str, job: str, deployment_file: str, requirements_file: str, no_package: bool, no_rebuild: bool, ): api_client = prepare_environment(environment) cluster_id = _preprocess_cluster_args(api_client, cluster_name, cluster_id) dbx_echo( f"Executing job: {job} in environment {environment} on cluster {cluster_name} (id: {cluster_id})" ) handle_package(no_rebuild) deployment = get_deployment_config(deployment_file).get_environment( environment) _verify_deployment(deployment, environment, deployment_file) found_jobs = [j for j in deployment["jobs"] if j["name"] == job] if not found_jobs: raise RuntimeError( f"Job {job} was not found in environment jobs, please check the deployment file" ) job_payload = found_jobs[0] entrypoint_file = job_payload.get("spark_python_task").get("python_file") if not entrypoint_file: raise FileNotFoundError( f"No entrypoint file provided in job {job}. " f"Please add one under spark_python_task.python_file section") cluster_service = ClusterService(api_client) dbx_echo("Preparing interactive cluster to accept jobs") awake_cluster(cluster_service, cluster_id) v1_client = ApiV1Client(api_client) context_id = get_context_id(v1_client, cluster_id, "python") file_uploader = FileUploader(api_client) with mlflow.start_run() as execution_run: artifact_base_uri = execution_run.info.artifact_uri localized_base_path = artifact_base_uri.replace("dbfs:/", "/dbfs/") requirements_fp = pathlib.Path(requirements_file) if requirements_fp.exists(): file_uploader.upload_file(requirements_fp) localized_requirements_path = f"{localized_base_path}/{str(requirements_fp)}" installation_command = f"%pip install -U -r {localized_requirements_path}" dbx_echo("Installing provided requirements") execute_command(v1_client, cluster_id, context_id, installation_command, verbose=False) dbx_echo("Provided requirements installed") else: dbx_echo( f"Requirements file {requirements_fp} is not provided" + ", following the execution without any additional packages") if not no_package: package_file = get_package_file() if not package_file: raise FileNotFoundError( "Project package was not found. Please check that /dist directory exists." ) file_uploader.upload_file(package_file) localized_package_path = f"{localized_base_path}/{str(package_file.as_posix())}" dbx_echo("Installing package") installation_command = f"%pip install --force-reinstall {localized_package_path}" execute_command(v1_client, cluster_id, context_id, installation_command, verbose=False) dbx_echo("Package installation finished") else: dbx_echo( "Package was disabled via --no-package, only the code from entrypoint will be used" ) tags = {"dbx_action_type": "execute", "dbx_environment": environment} mlflow.set_tags(tags) dbx_echo("Processing parameters") task_props: List[Any] = job_payload.get("spark_python_task").get( "parameters", []) if task_props: def adjustment_callback(p: Any): return _adjust_path(p, artifact_base_uri, file_uploader) _walk_content(adjustment_callback, task_props) task_props = ["python"] + task_props parameters_command = f""" import sys sys.argv = {task_props} """ execute_command(v1_client, cluster_id, context_id, parameters_command, verbose=False) dbx_echo("Processing parameters - done") dbx_echo("Starting entrypoint file execution") execute_command(v1_client, cluster_id, context_id, pathlib.Path(entrypoint_file).read_text()) dbx_echo("Command execution finished")
def execute( environment: str, cluster_id: str, cluster_name: str, job: str, deployment_file: str, requirements_file: str, no_rebuild: bool, ): api_client = prepare_environment(environment) cluster_id = _preprocess_cluster_args(api_client, cluster_name, cluster_id) dbx_echo( f"Executing job: {job} with environment: {environment} on cluster: {cluster_id}" ) handle_package(no_rebuild) deployment = DeploymentFile(deployment_file).get_environment(environment) if not deployment: raise NameError( f"Environment {environment} is not provided in deployment file {deployment_file}" + " please add this environment first" ) env_jobs = deployment.get("jobs") if not env_jobs: raise RuntimeError( f"No jobs section found in environment {environment}, please check the deployment file" ) found_jobs = [j for j in deployment["jobs"] if j["name"] == job] if not found_jobs: raise RuntimeError( f"Job {job} was not found in environment jobs, please check the deployment file" ) job_payload = found_jobs[0] entrypoint_file = job_payload.get("spark_python_task").get("python_file") if not entrypoint_file: raise FileNotFoundError( f"No entrypoint file provided in job {job}. " f"Please add one under spark_python_task.python_file section" ) cluster_service = ClusterService(api_client) dbx_echo("Preparing interactive cluster to accept jobs") awake_cluster(cluster_service, cluster_id) v1_client = ApiV1Client(api_client) context_id = get_context_id(v1_client, cluster_id, "python") file_uploader = FileUploader(api_client) with mlflow.start_run() as execution_run: artifact_base_uri = execution_run.info.artifact_uri localized_base_path = artifact_base_uri.replace("dbfs:/", "/dbfs/") requirements_fp = pathlib.Path(requirements_file) if requirements_fp.exists(): file_uploader.upload_file(requirements_fp) localized_requirements_path = ( f"{localized_base_path}/{str(requirements_fp)}" ) installation_command = f"%pip install -U -r {localized_requirements_path}" dbx_echo("Installing provided requirements") execute_command( v1_client, cluster_id, context_id, installation_command, verbose=False ) dbx_echo("Provided requirements installed") else: dbx_echo( f"Requirements file {requirements_fp} is not provided" + ", following the execution without any additional packages" ) project_package_path = list(pathlib.Path(".").rglob("dist/*.whl")) # Make sure we always take the newest package (i.e. sort by time) project_package_path.sort(key=os.path.getctime, reverse=True) project_package_path = project_package_path[0] file_uploader.upload_file(project_package_path) localized_package_path = ( f"{localized_base_path}/{str(project_package_path.as_posix())}" ) dbx_echo("Installing package") installation_command = f"%pip install {localized_package_path} --force-reinstall" execute_command( v1_client, cluster_id, context_id, installation_command, verbose=False ) dbx_echo("Package installation finished") tags = {"dbx_action_type": "execute", "dbx_environment": environment} mlflow.set_tags(tags) dbx_echo("Starting entrypoint file execution") execute_command( v1_client, cluster_id, context_id, pathlib.Path(entrypoint_file).read_text() ) dbx_echo("Command execution finished")