示例#1
0
文件: deploy.py 项目: jspreddy/dbx
def _create_jobs(jobs: List[Dict[str, Any]],
                 api_client: ApiClient) -> Dict[str, int]:
    deployment_data = {}
    for job in jobs:
        dbx_echo(f'Processing deployment for job: {job["name"]}')
        jobs_service = JobsService(api_client)
        all_jobs = jobs_service.list_jobs().get("jobs", [])
        matching_jobs = [
            j for j in all_jobs if j["settings"]["name"] == job["name"]
        ]

        if not matching_jobs:
            job_id = _create_job(api_client, job)
        else:

            if len(matching_jobs) > 1:
                raise Exception(
                    f"""There are more than one jobs with name {job["name"]}.
                Please delete duplicated jobs first""")

            job_id = matching_jobs[0]["job_id"]
            _update_job(jobs_service, job_id, job)

        deployment_data[job["name"]] = job_id
    return deployment_data
示例#2
0
文件: deploy.py 项目: jspreddy/dbx
def _update_job(jobs_service: JobsService, job_id: str, job: Dict[str,
                                                                  Any]) -> str:
    dbx_echo(
        f'Updating existing job with id: {job_id} and name: {job["name"]}')
    try:
        jobs_service.reset_job(job_id, job)
    except HTTPError as e:
        dbx_echo("Failed to update job with definition:")
        dbx_echo(json.dumps(job, indent=4))
        raise e
    return job_id
示例#3
0
    def launch(self) -> Tuple[Dict[Any, Any], Optional[str]]:
        dbx_echo("Launching job via run now API")
        jobs_service = JobsService(self.api_client)

        all_jobs = jobs_service.list_jobs().get("jobs", [])

        matching_jobs = [
            j for j in all_jobs if j["settings"]["name"] == self.job
        ]

        if not matching_jobs:
            raise Exception(f"Job with name {self.job} not found")

        if len(matching_jobs) > 1:
            raise Exception(
                f"Job with name {self.job} is duplicated. Please make job name unique."
            )

        job_data = matching_jobs[0]
        job_id = job_data["job_id"]

        active_runs = jobs_service.list_runs(job_id,
                                             active_only=True).get("runs", [])

        for run in active_runs:
            if self.existing_runs == "pass":
                dbx_echo("Passing the existing runs status check")

            if self.existing_runs == "wait":
                dbx_echo(
                    f'Waiting for job run with id {run["run_id"]} to be finished'
                )
                _wait_run(self.api_client, run)

            if self.existing_runs == "cancel":
                dbx_echo(f'Cancelling run with id {run["run_id"]}')
                _cancel_run(self.api_client, run)

        if self.prepared_parameters:
            dbx_echo(
                f"Default launch parameters are overridden with the following: {self.prepared_parameters}"
            )
            # we don't do a null-check here since the job existence will be already done during listing above.
            job_settings = job_data.get("settings")

            # here we define the job type to correctly pass parameters
            extra_payload_key = _define_payload_key(job_settings)

            extra_payload = {extra_payload_key: self.prepared_parameters}

            run_data = jobs_service.run_now(job_id, **extra_payload)

        else:
            run_data = jobs_service.run_now(job_id)

        return run_data, job_id
示例#4
0
    def launch(self) -> Tuple[Dict[Any, Any], Optional[str]]:
        dbx_echo("Launching job via run now API")
        jobs_service = JobsService(self.api_client)

        all_jobs = jobs_service.list_jobs().get("jobs", [])

        matching_jobs = [
            j for j in all_jobs if j["settings"]["name"] == self.job
        ]

        if not matching_jobs:
            raise Exception(f"Job with name {self.job} not found")

        if len(matching_jobs) > 1:
            raise Exception(
                f"Job with name {self.job} is duplicated. Please make job name unique."
            )

        job_id = matching_jobs[0]["job_id"]

        active_runs = jobs_service.list_runs(job_id,
                                             active_only=True).get("runs", [])

        for run in active_runs:
            if self.existing_runs == "pass":
                dbx_echo("Passing the existing runs status check")

            if self.existing_runs == "wait":
                dbx_echo(
                    f'Waiting for job run with id {run["run_id"]} to be finished'
                )
                _wait_run(self.api_client, run)

            if self.existing_runs == "cancel":
                dbx_echo(f'Cancelling run with id {run["run_id"]}')
                _cancel_run(self.api_client, run)

        if self.override_parameters:
            _prepared_parameters = sum(
                [[k, v] for k, v in self.override_parameters.items()], [])
            dbx_echo(
                f"Default launch parameters are overridden with the following: {_prepared_parameters}"
            )
            run_data = jobs_service.run_now(job_id,
                                            python_params=_prepared_parameters)
        else:
            run_data = jobs_service.run_now(job_id)

        return run_data, job_id
示例#5
0
def launch(
    environment: str,
    job: str,
    trace: bool,
    kill_on_sigterm: bool,
    existing_runs: str,
    as_run_submit: bool,
    tags: List[str],
    parameters: List[str],
    parameters_raw: Optional[str],
):
    dbx_echo(f"Launching job {job} on environment {environment}")

    api_client = prepare_environment(environment)
    additional_tags = parse_multiple(tags)

    if parameters_raw:
        prepared_parameters = parameters_raw
    else:
        override_parameters = parse_multiple(parameters)
        prepared_parameters = sum([[k, v]
                                   for k, v in override_parameters.items()],
                                  [])

    filter_string = generate_filter_string(environment)

    run_info = _find_deployment_run(filter_string, additional_tags,
                                    as_run_submit, environment)

    deployment_run_id = run_info["run_id"]

    with mlflow.start_run(run_id=deployment_run_id) as deployment_run:

        with mlflow.start_run(nested=True):
            artifact_base_uri = deployment_run.info.artifact_uri

            if not as_run_submit:
                run_launcher = RunNowLauncher(job, api_client,
                                              artifact_base_uri, existing_runs,
                                              prepared_parameters)
            else:
                run_launcher = RunSubmitLauncher(job, api_client,
                                                 artifact_base_uri,
                                                 existing_runs,
                                                 prepared_parameters,
                                                 environment)

            run_data, job_id = run_launcher.launch()

            jobs_service = JobsService(api_client)
            run_info = jobs_service.get_run(run_data["run_id"])
            run_url = run_info.get("run_page_url")
            dbx_echo(f"Run URL: {run_url}")
            if trace:
                if kill_on_sigterm:
                    dbx_echo("Click Ctrl+C to stop the run")
                    try:
                        dbx_status = _trace_run(api_client, run_data)
                    except KeyboardInterrupt:
                        dbx_status = "CANCELLED"
                        dbx_echo("Cancelling the run gracefully")
                        _cancel_run(api_client, run_data)
                        dbx_echo("Run cancelled successfully")
                else:
                    dbx_status = _trace_run(api_client, run_data)

                if dbx_status == "ERROR":
                    raise Exception(
                        "Tracked run failed during execution. Please check Databricks UI for run logs"
                    )
                dbx_echo("Launch command finished")

            else:
                dbx_status = "NOT_TRACKED"
                dbx_echo(
                    "Run successfully launched in non-tracking mode. Please check Databricks UI for job status"
                )

            deployment_tags = {
                "job_id": job_id,
                "run_id": run_data.get("run_id"),
                "dbx_action_type": "launch",
                "dbx_status": dbx_status,
                "dbx_environment": environment,
            }

            mlflow.set_tags(deployment_tags)
示例#6
0
def _get_run_status(api_client: ApiClient,
                    run_data: Dict[str, Any]) -> Dict[str, Any]:
    jobs_service = JobsService(api_client)
    run_status = jobs_service.get_run(run_data["run_id"])
    return run_status
示例#7
0
def _cancel_run(api_client: ApiClient, run_data: Dict[str, Any]):
    jobs_service = JobsService(api_client)
    jobs_service.cancel_run(run_data["run_id"])
    _wait_run(api_client, run_data)
示例#8
0
def launch(
        environment: str,
        job: str,
        trace: bool,
        kill_on_sigterm: bool,
        existing_runs: str,
        tags: List[str],
        parameters: List[str],
):
    dbx_echo(f"Launching job {job} on environment {environment}")

    api_client = prepare_environment(environment)
    additional_tags = parse_multiple(tags)
    override_parameters = parse_multiple(parameters)

    filter_string = generate_filter_string(environment, additional_tags)

    runs = mlflow.search_runs(filter_string=filter_string, max_results=1)

    if runs.empty:
        raise EnvironmentError(
            f"""
        No runs provided per given set of filters:
            {filter_string}
        Please check experiment UI to verify current status of deployments.
        """
        )

    run_info = runs.iloc[0].to_dict()

    dbx_echo("Successfully found deployment per given job name")

    deployment_run_id = run_info["run_id"]

    with mlflow.start_run(run_id=deployment_run_id) as deployment_run:
        with mlflow.start_run(nested=True):

            artifact_base_uri = deployment_run.info.artifact_uri
            deployments = _load_deployments(api_client, artifact_base_uri)
            job_id = deployments.get(job)

            if not job_id:
                raise Exception(
                    f"Job with name {job} not found in the latest deployment" % job
                )

            jobs_service = JobsService(api_client)
            active_runs = jobs_service.list_runs(job_id, active_only=True).get(
                "runs", []
            )

            for run in active_runs:
                if existing_runs == "pass":
                    dbx_echo("Passing the existing runs status check")

                if existing_runs == "wait":
                    dbx_echo(
                        f'Waiting for job run with id {run["run_id"]} to be finished'
                    )
                    _wait_run(api_client, run)

                if existing_runs == "cancel":
                    dbx_echo(f'Cancelling run with id {run["run_id"]}')
                    _cancel_run(api_client, run)

            if override_parameters:
                _prepared_parameters = sum(
                    [[k, v] for k, v in override_parameters.items()], []
                )
                dbx_echo(
                    f"Default launch parameters are overridden with the following: {_prepared_parameters}"
                )
                run_data = jobs_service.run_now(
                    job_id, python_params=_prepared_parameters
                )
            else:
                run_data = jobs_service.run_now(job_id)

            if trace:
                dbx_echo("Tracing job run")
                if kill_on_sigterm:
                    dbx_echo("Click Ctrl+C to stop the job run")
                    try:
                        dbx_status = _trace_run(api_client, run_data)
                    except KeyboardInterrupt:
                        dbx_status = "CANCELLED"
                        dbx_echo("Cancelling the run gracefully")
                        _cancel_run(api_client, run_data)
                        dbx_echo("Run cancelled successfully")
                else:
                    dbx_status = _trace_run(api_client, run_data)

                if dbx_status == "ERROR":
                    raise Exception(
                        "Tracked job failed during execution. "
                        "Please check Databricks UI for job logs"
                    )
                dbx_echo("Launch command finished")

            else:
                dbx_status = "NOT_TRACKED"
                dbx_echo(
                    "Job successfully launched in non-tracking mode. Please check Databricks UI for job status"
                )

            deployment_tags = {
                "job_id": job_id,
                "run_id": run_data["run_id"],
                "dbx_action_type": "launch",
                "dbx_status": dbx_status,
                "dbx_environment": environment,
            }

            mlflow.set_tags(deployment_tags)