def _maybe_set_run_terminated(active_run, status): """ If the passed-in active run is defined and still running (i.e. hasn't already been terminated within user code), mark it as terminated with the passed-in status. """ if active_run is None: return run_id = active_run.info.run_uuid cur_status = tracking.get_service().get_run(run_id).info.status if RunStatus.is_terminated(cur_status): return tracking.get_service().set_terminated(run_id, status)
def _print_description_and_log_tags(self): eprint( "=== Launched MLflow run as Databricks job run with ID %s. Getting run status " "page URL... ===" % self._databricks_run_id) run_info = self._job_runner.jobs_runs_get(self._databricks_run_id) jobs_page_url = run_info["run_page_url"] eprint("=== Check the run's status at %s ===" % jobs_page_url) host_creds = databricks_utils.get_databricks_host_creds( self._job_runner.databricks_profile) tracking.get_service().set_tag(self._mlflow_run_id, MLFLOW_DATABRICKS_RUN_URL, jobs_page_url) tracking.get_service().set_tag(self._mlflow_run_id, MLFLOW_DATABRICKS_SHELL_JOB_RUN_ID, self._databricks_run_id) tracking.get_service().set_tag(self._mlflow_run_id, MLFLOW_DATABRICKS_WEBAPP_URL, host_creds.host) job_id = run_info.get('job_id') # In some releases of Databricks we do not return the job ID. We start including it in DB # releases 2.80 and above. if job_id is not None: tracking.get_service().set_tag(self._mlflow_run_id, MLFLOW_DATABRICKS_SHELL_JOB_ID, job_id)
def test_log_metric_validation(): try: tracking.set_tracking_uri(tempfile.mkdtemp()) active_run = start_run() run_uuid = active_run.info.run_uuid with active_run: mlflow.log_metric("name_1", "apple") finished_run = tracking.get_service().get_run(run_uuid) assert len(finished_run.data.metrics) == 0 finally: tracking.set_tracking_uri(None)
def _create_run(uri, experiment_id, work_dir, entry_point, parameters): """ Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI, entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be used to report additional data about the run (metrics/params) to the tracking server. """ if _is_local_uri(uri): source_name = tracking.utils._get_git_url_if_present(_expand_uri(uri)) else: source_name = _expand_uri(uri) active_run = tracking.get_service().create_run( experiment_id=experiment_id, source_name=source_name, source_version=_get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) if parameters is not None: for key, value in parameters.items(): tracking.get_service().log_param(active_run.info.run_uuid, key, value) return active_run
def test_start_and_end_run(): try: tracking.set_tracking_uri(tempfile.mkdtemp()) # Use the start_run() and end_run() APIs without a `with` block, verify they work. active_run = start_run() mlflow.log_metric("name_1", 25) end_run() finished_run = tracking.get_service().get_run(active_run.info.run_uuid) # Validate metrics assert len(finished_run.data.metrics) == 1 expected_pairs = {"name_1": 25} for metric in finished_run.data.metrics: assert expected_pairs[metric.key] == metric.value finally: tracking.set_tracking_uri(None)
def test_start_run_context_manager(): try: tracking.set_tracking_uri(tempfile.mkdtemp()) first_run = start_run() first_uuid = first_run.info.run_uuid with first_run: # Check that start_run() causes the run information to be persisted in the store persisted_run = tracking.get_service().get_run(first_uuid) assert persisted_run is not None assert persisted_run.info == first_run.info finished_run = tracking.get_service().get_run(first_uuid) assert finished_run.info.status == RunStatus.FINISHED # Launch a separate run that fails, verify the run status is FAILED and the run UUID is # different second_run = start_run() assert second_run.info.run_uuid != first_uuid with pytest.raises(Exception): with second_run: raise Exception("Failing run!") finished_run2 = tracking.get_service().get_run( second_run.info.run_uuid) assert finished_run2.info.status == RunStatus.FAILED finally: tracking.set_tracking_uri(None)
def test_log_metric(): try: tracking.set_tracking_uri(tempfile.mkdtemp()) active_run = start_run() run_uuid = active_run.info.run_uuid with active_run: mlflow.log_metric("name_1", 25) mlflow.log_metric("name_2", -3) mlflow.log_metric("name_1", 30) mlflow.log_metric("nested/nested/name", 40) finished_run = tracking.get_service().get_run(run_uuid) # Validate metrics assert len(finished_run.data.metrics) == 3 expected_pairs = {"name_1": 30, "name_2": -3, "nested/nested/name": 40} for metric in finished_run.data.metrics: assert expected_pairs[metric.key] == metric.value finally: tracking.set_tracking_uri(None)
def _wait_for(submitted_run_obj): """Wait on the passed-in submitted run, reporting its status to the tracking server.""" run_id = submitted_run_obj.run_id active_run = None # Note: there's a small chance we fail to report the run's status to the tracking server if # we're interrupted before we reach the try block below try: active_run = tracking.get_service().get_run(run_id) if run_id is not None else None if submitted_run_obj.wait(): eprint("=== Run (ID '%s') succeeded ===" % run_id) _maybe_set_run_terminated(active_run, "FINISHED") else: _maybe_set_run_terminated(active_run, "FAILED") raise ExecutionException("Run (ID '%s') failed" % run_id) except KeyboardInterrupt: eprint("=== Run (ID '%s') interrupted, cancelling run ===" % run_id) submitted_run_obj.cancel() _maybe_set_run_terminated(active_run, "FAILED") raise
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None, mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None, block=True, run_id=None): """ Helper that delegates to the project-running method corresponding to the passed-in mode. Returns a ``SubmittedRun`` corresponding to the project run. """ exp_id = experiment_id or _get_experiment_id() parameters = parameters or {} work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version, git_username=git_username, git_password=git_password) project = _project_spec.load_project(work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) if run_id: active_run = tracking.get_service().get_run(run_id) else: active_run = _create_run(uri, exp_id, work_dir, entry_point, parameters) if mode == "databricks": from mlflow.projects.databricks import run_databricks return run_databricks( remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=exp_id, cluster_spec=cluster_spec) elif mode == "local" or mode is None: # Synchronously create a conda environment (even though this may take some time) to avoid # failures due to multiple concurrent attempts to create the same conda env. conda_env_name = _get_or_create_conda_env(project.conda_env_path) if use_conda else None # In blocking mode, run the entry point command in blocking fashion, sending status updates # to the tracking server when finished. Note that the run state may not be persisted to the # tracking server if interrupted if block: command = _get_entry_point_command( project, entry_point, parameters, conda_env_name, storage_dir) return _run_entry_point(command, work_dir, exp_id, run_id=active_run.info.run_uuid) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess( work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_uuid) supported_modes = ["local", "databricks"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (mode, supported_modes))
def run_databricks(uri, entry_point, version, parameters, experiment_id, cluster_spec, git_username, git_password): """ Runs the project at the specified URI on Databricks, returning a `SubmittedRun` that can be used to query the run's status or wait for the resulting Databricks Job run to terminate. """ tracking_uri = tracking.get_tracking_uri() _before_run_validations(tracking_uri, cluster_spec) work_dir = _fetch_and_clean_project(uri=uri, version=version, git_username=git_username, git_password=git_password) project = _project_spec.load_project(work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) dbfs_fuse_uri = _upload_project_to_dbfs(work_dir, experiment_id) remote_run = tracking.get_service().create_run( experiment_id=experiment_id, source_name=_expand_uri(uri), source_version=_get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) env_vars = { tracking._TRACKING_URI_ENV_VAR: tracking_uri, tracking._EXPERIMENT_ID_ENV_VAR: experiment_id, } run_id = remote_run.info.run_uuid eprint("=== Running entry point %s of project %s on Databricks. ===" % (entry_point, uri)) # Launch run on Databricks with open(cluster_spec, 'r') as handle: try: cluster_spec = json.load(handle) except ValueError: eprint( "Error when attempting to load and parse JSON cluster spec from file " "%s. " % cluster_spec) raise command = _get_databricks_run_cmd(dbfs_fuse_uri, run_id, entry_point, parameters) db_run_id = _run_shell_command_job(uri, command, env_vars, cluster_spec) return DatabricksSubmittedRun(db_run_id, run_id)
def test_log_param(): try: tracking.set_tracking_uri(tempfile.mkdtemp()) active_run = start_run() run_uuid = active_run.info.run_uuid with active_run: mlflow.log_param("name_1", "a") mlflow.log_param("name_2", "b") mlflow.log_param("name_1", "c") mlflow.log_param("nested/nested/name", 5) finished_run = tracking.get_service().get_run(run_uuid) # Validate params assert len(finished_run.data.params) == 3 expected_pairs = { "name_1": "c", "name_2": "b", "nested/nested/name": "5" } for param in finished_run.data.params: assert expected_pairs[param.key] == param.value finally: tracking.set_tracking_uri(None)
def set_tag_mock(): with mock.patch("mlflow.projects.databricks.tracking.get_service") as m: mlflow_service_mock = mock.Mock(wraps=get_service()) m.return_value = mlflow_service_mock yield mlflow_service_mock.set_tag
""" import sys sys.path.append('../') from models.metrics import metrics_regression import click import warnings import mlflow import mlflow.sklearn # # available experiments from mlflow.tracking import get_service # get service service = get_service() # returns a list of mlflow.entities.Experiment experiments = service.list_experiments() # collect experiments information lidexp = list() didexp = dict() for ii, iexp in enumerate(experiments): lidexp.append(iexp.experiment_id) didexp[iexp.experiment_id] = {'name': iexp.name, 'location': iexp.artifact_location} # # ARGUMENTS: GENERAL from experiments.arguments import * # # LOAD DATA