Exemplo n.º 1
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri):
     self._check_root_dir()
     meta_dir = mkdir(self.root_directory, str(experiment_id))
     artifact_uri = artifact_uri or build_path(self.artifact_root_uri,
                                               str(experiment_id))
     experiment = Experiment(experiment_id, name, artifact_uri,
                             LifecycleStage.ACTIVE)
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment))
     return experiment_id
Exemplo n.º 2
0
def test_message_to_json():
    json_out = message_to_json(
        Experiment(123, "name", "arty", 'active').to_proto())
    assert json.loads(json_out) == {
        "experiment_id": "123",
        "name": "name",
        "artifact_location": "arty",
        "lifecycle_stage": 'active',
    }
Exemplo n.º 3
0
    def get_experiment(self, experiment_id):
        """
        Fetches the experiment from the backend store.

        :param experiment_id: Integer id for the experiment
        :return: A single Experiment object if it exists, otherwise raises an Exception.
        """
        req_body = message_to_json(GetExperiment(experiment_id=experiment_id))
        response_proto = self._call_endpoint(GetExperiment, req_body)
        return Experiment.from_proto(response_proto.experiment)
Exemplo n.º 4
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Experiment`.
        """
        return Experiment(experiment_id=str(self.experiment_id),
                          name=self.name,
                          artifact_location=self.artifact_location,
                          lifecycle_stage=self.lifecycle_stage)
Exemplo n.º 5
0
 def test_string_repr(self):
     exp = Experiment(
         experiment_id=0,
         name="myname",
         artifact_location="hi",
         lifecycle_stage=LifecycleStage.ACTIVE,
     )
     assert (str(exp) ==
             "<Experiment: artifact_location='hi', experiment_id=0, "
             "lifecycle_stage='active', name='myname', tags={}>")
Exemplo n.º 6
0
 def list_experiments(self, view_type=ViewType.ACTIVE_ONLY):
     """
     :return: a list of all known Experiment objects
     """
     req_body = message_to_json(ListExperiments(view_type=view_type))
     response_proto = self._call_endpoint(ListExperiments, req_body)
     return [
         Experiment.from_proto(experiment_proto)
         for experiment_proto in response_proto.experiments
     ]
Exemplo n.º 7
0
    def test_creation_and_hydration(self):
        exp_id = random_int()
        name = "exp_%d_%d" % (random_int(), random_int())
        lifecycle_stage = Experiment.ACTIVE_LIFECYCLE
        location = random_file(".json")

        exp = Experiment(exp_id, name, location, lifecycle_stage)
        self._check(exp, exp_id, name, location, lifecycle_stage)

        as_dict = {"experiment_id": exp_id, "name": name, "artifact_location": location,
                   "lifecycle_stage": lifecycle_stage}
        self.assertEqual(dict(exp), as_dict)

        proto = exp.to_proto()
        exp2 = Experiment.from_proto(proto)
        self._check(exp2, exp_id, name, location, lifecycle_stage)

        exp3 = Experiment.from_dictionary(as_dict)
        self._check(exp3, exp_id, name, location, lifecycle_stage)
Exemplo n.º 8
0
def test_resolve_experiment_id(experiment_id, experiment_name, expected):
    with mock.patch('mlflow.tracking.MlflowClient.get_experiment_by_name') \
            as get_experiment_by_name_mock:
        get_experiment_by_name_mock.return_value = Experiment(experiment_id=33, name='Name',
                                                              artifact_location=None,
                                                              lifecycle_stage=None)

        exp_id = mlflow.projects._resolve_experiment_id(experiment_name=experiment_name,
                                                        experiment_id=experiment_id)
        assert exp_id == expected
Exemplo n.º 9
0
    def get_experiment(self, experiment_id):
        """
        Fetch the experiment from the backend store.

        :param experiment_id: String id for the experiment

        :return: A single :py:class:`mlflow.entities.Experiment` object if it exists,
        otherwise raises an Exception.
        """
        req_body = message_to_json(GetExperiment(experiment_id=str(experiment_id)))
        response_proto = self._call_endpoint(GetExperiment, req_body)
        return Experiment.from_proto(response_proto.experiment)
Exemplo n.º 10
0
 def _get_experiment(self, experiment_id, view_type=ViewType.ALL):
     self._check_root_dir()
     experiment_dir = self._get_experiment_path(experiment_id, view_type)
     if experiment_dir is None:
         raise Exception("Could not find experiment with ID %s" %
                         experiment_id)
     meta = read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
     if experiment_dir.startswith(self.trash_folder):
         meta['lifecycle_stage'] = Experiment.DELETED_LIFECYCLE
     else:
         meta['lifecycle_stage'] = Experiment.ACTIVE_LIFECYCLE
     return Experiment.from_dictionary(meta)
Exemplo n.º 11
0
def _dict_to_experiment(experiment_dict):
    dict_copy = experiment_dict.copy()

    # 'experiment_id' was changed from int to string, so we must cast to string
    # when reading legacy experiments
    if isinstance(dict_copy["experiment_id"], int):
        dict_copy["experiment_id"] = str(dict_copy["experiment_id"])
    # Turn the key/value tags into list of experiment tags
    if "tags" in dict_copy:
        dict_copy["tags"] = [
            ExperimentTag(kv[0], kv[1]) for kv in dict_copy["tags"].items()
        ]
    return Experiment.from_dictionary(dict_copy)
Exemplo n.º 12
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri):
     artifact_uri = artifact_uri or posixpath.join(self.artifact_root_uri,
                                                   str(experiment_id))
     self._check_root_dir()
     meta_dir = mkdir(self.root_directory, str(experiment_id))
     experiment = Experiment(experiment_id, name, artifact_uri,
                             LifecycleStage.ACTIVE)
     experiment_dict = dict(experiment)
     # tags are added to the file system and are not written to this dict on write
     # As such, we should not include them in the meta file.
     del experiment_dict['tags']
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict)
     return experiment_id
Exemplo n.º 13
0
    def test_deploy_with_requirements_and_branch(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_content = {"test": {"jobs": []}}

            write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

            sample_requirements = "\n".join(
                ["pyspark=3.0.0", "xgboost=0.6.0", "pyspark3d"])

            pathlib.Path("runtime_requirements.txt").write_text(
                sample_requirements)

            with patch(
                    "mlflow.get_experiment_by_name",
                    return_value=Experiment("id", None,
                                            f"dbfs:/dbx/{self.project_name}",
                                            None, None),
            ):
                deploy_result = invoke_cli_runner(
                    deploy,
                    [
                        "--environment",
                        "test",
                        "--requirements-file",
                        "runtime_requirements.txt",
                        "--branch-name",
                        "test-branch",
                    ],
                )

                deleted_dependency_lines = [
                    line for line in deploy_result.stdout.splitlines()
                    if "pyspark dependency deleted" in line
                ]
                self.assertEqual(len(deleted_dependency_lines), 1)

                self.assertEqual(deploy_result.exit_code, 0)
Exemplo n.º 14
0
    def test_no_runs(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_content = {"test": {"dbfs": {}, "jobs": []}}

            write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

            with patch(
                    "mlflow.get_experiment_by_name",
                    return_value=Experiment("id", None,
                                            f"dbfs:/dbx/{self.project_name}",
                                            None, None),
            ):
                deploy_result = invoke_cli_runner(
                    deploy,
                    ["--environment", "test", "--tags", "cake=cheesecake"])

                self.assertEqual(deploy_result.exit_code, 0)

                launch_result = invoke_cli_runner(
                    launch,
                    [
                        "--environment",
                        "test",
                        "--job",
                        "sample",
                        "--tags",
                        "cake=cheesecake",
                    ],
                    expected_error=True,
                )

                self.assertIsNotNone(launch_result.exception)
                self.assertTrue("not found in underlying MLflow experiment" in
                                str(launch_result.exception))
Exemplo n.º 15
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri, tags):
     artifact_uri = artifact_uri or append_to_uri_path(
         self.artifact_root_uri, str(experiment_id)
     )
     self._check_root_dir()
     meta_dir = mkdir(self.root_directory, str(experiment_id))
     experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE)
     experiment_dict = dict(experiment)
     # tags are added to the file system and are not written to this dict on write
     # As such, we should not include them in the meta file.
     del experiment_dict["tags"]
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict)
     if tags is not None:
         for tag in tags:
             self.set_experiment_tag(experiment_id, tag)
     return experiment_id
Exemplo n.º 16
0
    def test_with_permissions(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "default",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_file = pathlib.Path(DEFAULT_DEPLOYMENT_FILE_PATH)
            deploy_content = json.loads(deployment_file.read_text())

            sample_job = deploy_content.get("default").get("jobs")[0]

            sample_job["permissions"] = {
                "access_control_list": [
                    {
                        "user_name": "*****@*****.**",
                        "permission_level": "IS_OWNER",
                    },
                    {
                        "group_name": "some-user-group",
                        "permission_level": "CAN_VIEW"
                    },
                ]
            }

            deployment_file.write_text(json.dumps(deploy_content, indent=4))

            with patch(
                    "mlflow.get_experiment_by_name",
                    return_value=Experiment("id", None,
                                            f"dbfs:/dbx/{self.project_name}",
                                            None, None),
            ):
                deploy_result = invoke_cli_runner(deploy,
                                                  ["--environment", "default"])

                self.assertEqual(deploy_result.exit_code, 0)
Exemplo n.º 17
0
 def get_experiment_by_name(self, experiment_name):
     try:
         req_body = message_to_json(GetExperimentByName(experiment_name=experiment_name))
         response_proto = self._call_endpoint(GetExperimentByName, req_body)
         return Experiment.from_proto(response_proto.experiment)
     except MlflowException as e:
         if e.error_code == databricks_pb2.ErrorCode.Name(
             databricks_pb2.RESOURCE_DOES_NOT_EXIST
         ):
             return None
         elif e.error_code == databricks_pb2.ErrorCode.Name(databricks_pb2.ENDPOINT_NOT_FOUND):
             # Fall back to using ListExperiments-based implementation.
             for experiment in self.list_experiments(ViewType.ALL):
                 if experiment.name == experiment_name:
                     return experiment
             return None
         raise e
Exemplo n.º 18
0
 def _get_experiment(self, experiment_id, view_type=ViewType.ALL):
     self._check_root_dir()
     _validate_experiment_id(experiment_id)
     experiment_dir = self._get_experiment_path(experiment_id, view_type)
     if experiment_dir is None:
         raise MlflowException("Could not find experiment with ID %s" % experiment_id,
                               databricks_pb2.RESOURCE_DOES_NOT_EXIST)
     meta = read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
     if experiment_dir.startswith(self.trash_folder):
         meta['lifecycle_stage'] = Experiment.DELETED_LIFECYCLE
     else:
         meta['lifecycle_stage'] = Experiment.ACTIVE_LIFECYCLE
     experiment = Experiment.from_dictionary(meta)
     if int(experiment_id) != experiment.experiment_id:
         logging.warning("Experiment ID mismatch for exp %s. ID recorded as '%s' in meta data. "
                         "Experiment will be ignored.",
                         str(experiment_id), str(experiment.experiment_id), exc_info=True)
         return None
     return experiment
Exemplo n.º 19
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri):
     artifact_uri = artifact_uri or append_to_uri_path(
         self.artifact_root_uri, str(experiment_id))
     dynamodb = self._get_dynamodb_resource()
     table_name = "_".join(
         [self.table_prefix, DynamodbStore.EXPERIMENT_TABLE])
     table = dynamodb.Table(table_name)
     exp = Experiment(
         experiment_id=experiment_id,
         name=name,
         artifact_location=artifact_uri,
         lifecycle_stage=LifecycleStage.ACTIVE,
     )
     response = table.put_item(
         Item=_entity_to_dict(exp),
         ReturnConsumedCapacity="TOTAL",
     )
     if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
         raise MlflowException("DynamoDB connection error")
     return experiment_id
Exemplo n.º 20
0
    def test_launch_run_submit(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_content = {"test": {"dbfs": {}, "jobs": []}}

            write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

            with patch(
                    "mlflow.get_experiment_by_name",
                    return_value=Experiment("id", None,
                                            f"dbfs:/dbx/{self.project_name}",
                                            None, None),
            ):
                deploy_result = invoke_cli_runner(
                    deploy,
                    ["--environment", "test", "--tags", "cake=cheesecake"])

                self.assertEqual(deploy_result.exit_code, 0)

                launch_result = invoke_cli_runner(
                    launch,
                    [
                        "--environment", "test", "--job", "sample", "--tags",
                        "cake=cheesecake", "--as-run-submit"
                    ],
                )

                self.assertEqual(launch_result.exit_code, 0)
Exemplo n.º 21
0
    def test_write_specs_to_file(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "default",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            spec_file = ".dbx/deployment-result.json"
            with patch(
                    "mlflow.get_experiment_by_name",
                    return_value=Experiment("id", None,
                                            f"dbfs:/dbx/{self.project_name}",
                                            None, None),
            ):
                deploy_result = invoke_cli_runner(deploy, [
                    "--environment", "default", "--write-specs-to-file",
                    spec_file
                ])

                self.assertEqual(deploy_result.exit_code, 0)

                spec_result = json.loads(pathlib.Path(spec_file).read_text())

                self.assertIsNotNone(spec_result)

                deploy_overwrite = invoke_cli_runner(deploy, [
                    "--environment", "default", "--write-specs-to-file",
                    spec_file
                ])

                self.assertEqual(deploy_overwrite.exit_code, 0)
Exemplo n.º 22
0
    def test_deploy_incorrect_artifact_location(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            with patch(
                    "mlflow.get_experiment_by_name",
                    return_value=Experiment("id", None,
                                            "dbfs:/some/correct-location",
                                            None, None),
            ):
                deployment_content = {"test": {"jobs": []}}
                write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

                sample_config = read_json(INFO_FILE_PATH)
                sample_config["environments"]["test"][
                    "artifact_location"] = "dbfs:/some/another-location"
                write_json(sample_config, INFO_FILE_PATH)

                deploy_result = invoke_cli_runner(deploy,
                                                  ["--environment", "test"],
                                                  expected_error=True)

                self.assertIsInstance(deploy_result.exception, Exception)
                self.assertIn(
                    "Please change the experiment name to create a new experiment",
                    str(deploy_result.exception))
Exemplo n.º 23
0
    def test_deploy_non_existent_env(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_content = {
                "misconfigured-environment": {
                    "dbfs": {},
                    "jobs": []
                }
            }

            write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

            with patch(
                    "mlflow.get_experiment_by_name",
                    return_value=Experiment("id", None,
                                            f"dbfs:/dbx/{self.project_name}",
                                            None, None),
            ):
                deploy_result = invoke_cli_runner(deploy,
                                                  ["--environment", "test"],
                                                  expected_error=True)

                self.assertIsInstance(deploy_result.exception, NameError)
                self.assertIn("non-existent in the deployment file",
                              str(deploy_result.exception))
Exemplo n.º 24
0
    def test_deploy_listed_jobs(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_content = {
                "test": {
                    "jobs": [{
                        "name": "job-1"
                    }, {
                        "name": "job-2"
                    }]
                }
            }

            write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

            with patch(
                    "mlflow.get_experiment_by_name",
                    return_value=Experiment("id", None,
                                            f"dbfs:/dbx/{self.project_name}",
                                            None, None),
            ):
                deploy_result = invoke_cli_runner(
                    deploy, ["--environment", "test", "--jobs", "job-1,job-2"])
                self.assertEqual(deploy_result.exit_code, 0)
Exemplo n.º 25
0
    def test_execute(self, *args):  # noqa
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            with patch(
                    "mlflow.get_experiment_by_name",
                    return_value=Experiment(
                        "id", None,
                        f"dbfs:/Shared/dbx/projects/{self.project_name}", None,
                        None),
            ):
                execute_result = invoke_cli_runner(
                    execute,
                    [
                        "--environment",
                        "default",
                        "--cluster-id",
                        "000-some-cluster-id",
                        "--job",
                        f"{self.project_name}-sample",
                    ],
                )

                self.assertEqual(execute_result.exit_code, 0)
Exemplo n.º 26
0
 def list_experiments(
     self, view_type=ViewType.ACTIVE_ONLY, max_results=None, page_token=None,
 ):
     """
     :param view_type: Qualify requested type of experiments.
     :param max_results: If passed, specifies the maximum number of experiments desired. If not
                         passed, the server will pick a maximum number of results to return.
     :param page_token: Token specifying the next page of results. It should be obtained from
                         a ``list_experiments`` call.
     :return: A :py:class:`PagedList <mlflow.store.entities.PagedList>` of
              :py:class:`Experiment <mlflow.entities.Experiment>` objects. The pagination token
              for the next page can be obtained via the ``token`` attribute of the object.
     """
     req_body = message_to_json(
         ListExperiments(view_type=view_type, max_results=max_results, page_token=page_token)
     )
     response_proto = self._call_endpoint(ListExperiments, req_body)
     experiments = [Experiment.from_proto(x) for x in response_proto.experiments]
     # If the response doesn't contain `next_page_token`, `response_proto.next_page_token`
     # returns an empty string (default value for a string proto field).
     token = (
         response_proto.next_page_token if response_proto.HasField("next_page_token") else None
     )
     return PagedList(experiments, token)
Exemplo n.º 27
0
class ExecuteTest(DbxTest):
    @patch(
        "databricks_cli.configure.provider.ProfileConfigProvider.get_config",
        return_value=test_dbx_config,
    )
    @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True)
    @patch(
        "databricks_cli.clusters.api.ClusterService.get_cluster",
        return_value={"cluster_name": "some-name", "state": "RUNNING"},
    )
    @patch(
        "mlflow.get_experiment_by_name",
        return_value=Experiment("id", None, "location", None, None),
    )
    @patch("dbx.utils.common.ApiV1Client.create_context", return_value={"id": 1})
    @patch("dbx.utils.common.ApiV1Client.execute_command", return_value={"id": 1})
    @patch(
        "dbx.utils.common.ApiV1Client.get_command_status",
        return_value={
            "status": "Finished",
            "results": {"resultType": "Ok", "data": "Ok!"},
        },
    )
    @patch("mlflow.start_run", return_value=run_mock)
    @patch("mlflow.log_artifact", return_value=None)
    @patch("mlflow.set_tags", return_value=None)
    @patch(
        "mlflow.get_experiment_by_name",
        return_value=Experiment("id", None, "location", None, None),
    )
    @patch("mlflow.set_experiment", return_value=None)
    def test_execute(self, *args):  # noqa
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            execute_result = invoke_cli_runner(
                execute,
                [
                    "--environment",
                    "default",
                    "--cluster-id",
                    "000-some-cluster-id",
                    "--job",
                    f"{self.project_name}-sample",
                ],
            )

            self.assertEqual(execute_result.exit_code, 0)

    @patch(
        "databricks_cli.clusters.api.ClusterService.list_clusters",
        return_value={
            "clusters": [
                {
                    "cluster_name": "some-cluster-name",
                    "cluster_id": "aaa-111"
                },
                {
                    "cluster_name": "other-cluster-name",
                    "cluster_id": "aaa-bbb-ccc"
                },
                {
                    "cluster_name": "duplicated-name",
                    "cluster_id": "duplicated-1"
                },
                {
                    "cluster_name": "duplicated-name",
                    "cluster_id": "duplicated-2"
                },
            ]},
    )
    @patch(
        "databricks_cli.clusters.api.ClusterService.get_cluster",
        side_effect=lambda cid: "something" if cid in ("aaa-bbb-ccc", "aaa-111") else None
    )
    def test_preprocess_cluster_args(self, *args):  # noqa
        api_client = Mock(ApiClient)

        self.assertRaises(RuntimeError, _preprocess_cluster_args, api_client, None, None)

        id_by_name = _preprocess_cluster_args(api_client, "some-cluster-name", None)
        self.assertEqual(id_by_name, "aaa-111")

        id_by_id = _preprocess_cluster_args(api_client, None, "aaa-bbb-ccc")
        self.assertEqual(id_by_id, "aaa-bbb-ccc")

        self.assertRaises(NameError, _preprocess_cluster_args, api_client, "non-existent-cluster-by-name", None)
        self.assertRaises(NameError, _preprocess_cluster_args, api_client, "duplicated-name", None)
        self.assertRaises(NameError, _preprocess_cluster_args, api_client, None, "non-existent-id")

    def test_awake_cluster(self):
        # normal behavior
        cluster_service_mock = Mock(ClusterService)
        cluster_service_mock.get_cluster.side_effect = [
            {"state": "TERMINATED"},
            {"state": "PENDING"},
            {"state": "RUNNING"},
            {"state": "RUNNING"},
        ]
        awake_cluster(cluster_service_mock, "aaa-bbb")
        self.assertEqual(cluster_service_mock.get_cluster("aaa-bbb").get("state"), "RUNNING")

        # error behavior
        error_mock = Mock(ClusterService)
        error_mock.get_cluster.return_value = {"state": "ERROR"}
        self.assertRaises(RuntimeError, awake_cluster, error_mock, "aaa-bbb")
Exemplo n.º 28
0
def test_message_to_json():
    json_out = message_to_json(
        Experiment("123", "name", "arty", "active").to_proto())
    assert json.loads(json_out) == {
        "experiment_id": "123",
        "name": "name",
        "artifact_location": "arty",
        "lifecycle_stage": "active",
    }

    original_proto_message = RegisteredModel(
        name="model_1",
        creation_timestamp=111,
        last_updated_timestamp=222,
        description="Test model",
        latest_versions=[
            ModelVersion(
                name="mv-1",
                version="1",
                creation_timestamp=333,
                last_updated_timestamp=444,
                description="v 1",
                user_id="u1",
                current_stage="Production",
                source="A/B",
                run_id="9245c6ce1e2d475b82af84b0d36b52f4",
                status="READY",
                status_message=None,
            ),
            ModelVersion(
                name="mv-2",
                version="2",
                creation_timestamp=555,
                last_updated_timestamp=666,
                description="v 2",
                user_id="u2",
                current_stage="Staging",
                source="A/C",
                run_id="123",
                status="READY",
                status_message=None,
            ),
        ],
    ).to_proto()
    json_out = message_to_json(original_proto_message)
    json_dict = json.loads(json_out)
    assert json_dict == {
        "name":
        "model_1",
        "creation_timestamp":
        111,
        "last_updated_timestamp":
        222,
        "description":
        "Test model",
        "latest_versions": [
            {
                "name": "mv-1",
                "version": "1",
                "creation_timestamp": 333,
                "last_updated_timestamp": 444,
                "current_stage": "Production",
                "description": "v 1",
                "user_id": "u1",
                "source": "A/B",
                "run_id": "9245c6ce1e2d475b82af84b0d36b52f4",
                "status": "READY",
            },
            {
                "name": "mv-2",
                "version": "2",
                "creation_timestamp": 555,
                "last_updated_timestamp": 666,
                "current_stage": "Staging",
                "description": "v 2",
                "user_id": "u2",
                "source": "A/C",
                "run_id": "123",
                "status": "READY",
            },
        ],
    }
    new_proto_message = ProtoRegisteredModel()
    parse_dict(json_dict, new_proto_message)
    assert original_proto_message == new_proto_message

    test_message = ParseTextIntoProto(
        """
        field_int32: 11
        field_int64: 12
        field_uint32: 13
        field_uint64: 14
        field_sint32: 15
        field_sint64: 16
        field_fixed32: 17
        field_fixed64: 18
        field_sfixed32: 19
        field_sfixed64: 20
        field_bool: true
        field_string: "Im a string"
        field_with_default1: 111
        field_repeated_int64: [1, 2, 3]
        field_enum: ENUM_VALUE1
        field_inner_message {
            field_inner_int64: 101
            field_inner_repeated_int64: [102, 103]
        }
        field_inner_message {
            field_inner_int64: 104
            field_inner_repeated_int64: [105, 106]
        }
        oneof1: 207
        [mlflow.ExtensionMessage.field_extended_int64]: 100
        field_map1: [{key: 51 value: "52"}, {key: 53 value: "54"}]
        field_map2: [{key: "61" value: 62}, {key: "63" value: 64}]
        field_map3: [{key: 561 value: 562}, {key: 563 value: 564}]
        field_map4: [{key: 71
                      value: {field_inner_int64: 72
                              field_inner_repeated_int64: [81, 82]
                              field_inner_string: "str1"}},
                     {key: 73
                      value: {field_inner_int64: 74
                              field_inner_repeated_int64: 83
                              field_inner_string: "str2"}}]
    """,
        TestMessage(),
    )
    json_out = message_to_json(test_message)
    json_dict = json.loads(json_out)
    assert json_dict == {
        "field_int32":
        11,
        "field_int64":
        12,
        "field_uint32":
        13,
        "field_uint64":
        14,
        "field_sint32":
        15,
        "field_sint64":
        16,
        "field_fixed32":
        17,
        "field_fixed64":
        18,
        "field_sfixed32":
        19,
        "field_sfixed64":
        20,
        "field_bool":
        True,
        "field_string":
        "Im a string",
        "field_with_default1":
        111,
        "field_repeated_int64": [1, 2, 3],
        "field_enum":
        "ENUM_VALUE1",
        "field_inner_message": [
            {
                "field_inner_int64": 101,
                "field_inner_repeated_int64": [102, 103]
            },
            {
                "field_inner_int64": 104,
                "field_inner_repeated_int64": [105, 106]
            },
        ],
        "oneof1":
        207,
        # JSON doesn't support non-string keys, so the int keys will be converted to strings.
        "field_map1": {
            "51": "52",
            "53": "54"
        },
        "field_map2": {
            "63": 64,
            "61": 62
        },
        "field_map3": {
            "561": 562,
            "563": 564
        },
        "field_map4": {
            "73": {
                "field_inner_int64": 74,
                "field_inner_repeated_int64": [83],
                "field_inner_string": "str2",
            },
            "71": {
                "field_inner_int64": 72,
                "field_inner_repeated_int64": [81, 82],
                "field_inner_string": "str1",
            },
        },
        "[mlflow.ExtensionMessage.field_extended_int64]":
        "100",
    }
    new_test_message = TestMessage()
    parse_dict(json_dict, new_test_message)
    assert new_test_message == test_message
Exemplo n.º 29
0
import pytest
import mock

from mlflow.entities import (Experiment, Run, RunInfo, RunData, RunTag, Metric,
                             Param, ExperimentTag, RunStatus, LifecycleStage,
                             ViewType)

experiment = Experiment(experiment_id="1",
                        name="experiment_name",
                        artifact_location="artifact_location",
                        lifecycle_stage=LifecycleStage.ACTIVE,
                        tags=[])
run_info = RunInfo(run_uuid="1",
                   run_id="1",
                   experiment_id="experiment_id",
                   user_id="unknown",
                   status=RunStatus.to_string(RunStatus.RUNNING),
                   start_time=1,
                   end_time=None,
                   lifecycle_stage=LifecycleStage.ACTIVE,
                   artifact_uri="artifact_uri")
run_data = RunData(metrics=[], params=[], tags=[])
run = Run(run_info=run_info, run_data=run_data)

metric = Metric(key="metric1", value=1, timestamp=1, step=1)

param = Param(key="param1", value="val1")

tag = RunTag(key="tag1", value="val1")

experiment_tag = ExperimentTag(key="tag1", value="val1")
Exemplo n.º 30
0
class DeployTest(DbxTest):
    @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None)
    @patch(
        "databricks_cli.configure.provider.ProfileConfigProvider.get_config",
        return_value=test_dbx_config,
    )
    @patch(
        "databricks_cli.configure.provider.ProfileConfigProvider.get_config",
        return_value=test_dbx_config,
    )
    @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True)
    @patch("mlflow.set_experiment", return_value=None)
    @patch("mlflow.start_run", return_value=run_mock)
    @patch("mlflow.log_artifact", return_value=None)
    @patch("mlflow.set_tags", return_value=None)
    @patch("databricks_cli.configure.config._get_api_client", return_value=None)
    @patch(
        "mlflow.get_experiment_by_name",
        return_value=Experiment("id", None, "location", None, None),
    )
    @patch("mlflow.set_experiment", return_value=None)
    def test_deploy_basic(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_content = {"test": {"jobs": []}}

            write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

            deploy_result = invoke_cli_runner(deploy, ["--environment", "test"])

            self.assertEqual(deploy_result.exit_code, 0)

    @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None)
    @patch(
        "databricks_cli.configure.provider.ProfileConfigProvider.get_config",
        return_value=test_dbx_config,
    )
    @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True)
    @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True)
    @patch(
        "mlflow.get_experiment_by_name",
        return_value=Experiment("id", None, "location", None, None),
    )
    @patch("mlflow.set_experiment", return_value=None)
    @patch("mlflow.start_run", return_value=run_mock)
    @patch("mlflow.log_artifact", return_value=None)
    @patch("mlflow.set_tags", return_value=None)
    def test_deploy_non_existent_env(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_content = {"misconfigured-environment": {"dbfs": {}, "jobs": []}}

            write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

            deploy_result = invoke_cli_runner(deploy, ["--environment", "test"], expected_error=True)

            self.assertIsInstance(deploy_result.exception, NameError)
            self.assertIn("non-existent in the deployment file", str(deploy_result.exception))

    @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None)
    @patch(
        "databricks_cli.configure.provider.ProfileConfigProvider.get_config",
        return_value=test_dbx_config,
    )
    @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True)
    @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True)
    @patch("databricks_cli.jobs.api.JobsService.list_jobs", return_value={"jobs": []})
    @patch("databricks_cli.jobs.api.JobsApi.create_job", return_value={"job_id": "1"})
    @patch(
        "mlflow.get_experiment_by_name",
        return_value=Experiment("id", None, "location", None, None),
    )
    @patch("mlflow.set_experiment", return_value=None)
    @patch("mlflow.start_run", return_value=run_mock)
    @patch("mlflow.log_artifact", return_value=None)
    @patch("mlflow.set_tags", return_value=None)
    def test_deploy_listed_jobs(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_content = {"test": {"jobs": [{"name": "job-1"}, {"name": "job-2"}]}}

            write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

            deploy_result = invoke_cli_runner(deploy, ["--environment", "test", "--jobs", "job-1,job-2"])

            self.assertEqual(deploy_result.exit_code, 0)

    @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None)
    @patch(
        "databricks_cli.configure.provider.ProfileConfigProvider.get_config",
        return_value=test_dbx_config,
    )
    @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True)
    @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True)
    @patch("databricks_cli.jobs.api.JobsService.list_jobs", return_value={"jobs": []})
    @patch("databricks_cli.jobs.api.JobsApi.create_job", return_value={"job_id": "1"})
    @patch(
        "mlflow.get_experiment_by_name",
        return_value=Experiment("id", None, "location", None, None),
    )
    @patch("mlflow.set_experiment", return_value=None)
    @patch("mlflow.start_run", return_value=run_mock)
    @patch("mlflow.log_artifact", return_value=None)
    @patch("mlflow.set_tags", return_value=None)
    def test_deploy_with_requirements(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "test",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_content = {"test": {"jobs": []}}

            write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH)

            sample_requirements = "\n".join(["pyspark=3.0.0", "xgboost=0.6.0"])

            pathlib.Path("runtime_requirements.txt").write_text(sample_requirements)

            deploy_result = invoke_cli_runner(
                deploy,
                [
                    "--environment",
                    "test",
                    "--requirements-file",
                    "runtime_requirements.txt",
                ],
            )

            self.assertEqual(deploy_result.exit_code, 0)

    def test_update_job_positive(self):
        js = Mock(JobsService)
        _update_job(js, "aa-bbb-ccc-111", {"name": 1})
        self.assertEqual(0, 0)  # dummy test to verify positive case

    def test_update_job_negative(self):
        js = Mock(JobsService)
        js.reset_job.side_effect = Mock(side_effect=HTTPError())
        self.assertRaises(HTTPError, _update_job, js, "aa-bbb-ccc-111", {"name": 1})

    @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None)
    @patch(
        "databricks_cli.configure.provider.ProfileConfigProvider.get_config",
        return_value=test_dbx_config,
    )
    @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True)
    @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True)
    @patch("databricks_cli.jobs.api.JobsService.list_jobs", return_value={"jobs": []})
    @patch("databricks_cli.jobs.api.JobsApi.create_job", return_value={"job_id": "1"})
    @patch(
        "mlflow.get_experiment_by_name",
        return_value=Experiment("id", None, "location", "dbfs:/Shared/dbx/test", None),
    )
    @patch("mlflow.set_experiment", return_value=None)
    @patch("mlflow.start_run", return_value=run_mock)
    @patch("mlflow.log_artifact", return_value=None)
    @patch("mlflow.set_tags", return_value=None)
    def test_write_specs_to_file(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "default",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            spec_file = ".dbx/deployment-result.json"

            deploy_result = invoke_cli_runner(deploy, ["--environment", "default", "--write-specs-to-file", spec_file])

            self.assertEqual(deploy_result.exit_code, 0)

            spec_result = json.loads(pathlib.Path(spec_file).read_text())

            self.assertIsNotNone(spec_result)

            deploy_overwrite = invoke_cli_runner(
                deploy, ["--environment", "default", "--write-specs-to-file", spec_file]
            )

            self.assertEqual(deploy_overwrite.exit_code, 0)

    @patch("databricks_cli.sdk.api_client.ApiClient.perform_query", return_value=None)
    @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None)
    @patch(
        "databricks_cli.configure.provider.ProfileConfigProvider.get_config",
        return_value=test_dbx_config,
    )
    @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True)
    @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True)
    @patch("databricks_cli.jobs.api.JobsService.list_jobs", return_value={"jobs": []})
    @patch("databricks_cli.jobs.api.JobsApi.create_job", return_value={"job_id": "1"})
    @patch(
        "mlflow.get_experiment_by_name",
        return_value=Experiment("id", None, "location", "dbfs:/Shared/dbx/test", None),
    )
    @patch("mlflow.set_experiment", return_value=None)
    @patch("mlflow.start_run", return_value=run_mock)
    @patch("mlflow.log_artifact", return_value=None)
    @patch("mlflow.set_tags", return_value=None)
    def test_with_permissions(self, *_):
        with self.project_dir:
            ws_dir = "/Shared/dbx/projects/%s" % self.project_name
            configure_result = invoke_cli_runner(
                configure,
                [
                    "--environment",
                    "default",
                    "--profile",
                    self.profile_name,
                    "--workspace-dir",
                    ws_dir,
                ],
            )
            self.assertEqual(configure_result.exit_code, 0)

            deployment_file = pathlib.Path(DEFAULT_DEPLOYMENT_FILE_PATH)
            deploy_content = json.loads(deployment_file.read_text())

            sample_job = deploy_content.get("default").get("jobs")[0]

            sample_job["permissions"] = {
                "access_control_list": [
                    {
                        "user_name": "*****@*****.**",
                        "permission_level": "IS_OWNER",
                    },
                    {"group_name": "some-user-group", "permission_level": "CAN_VIEW"},
                ]
            }

            deployment_file.write_text(json.dumps(deploy_content, indent=4))

            deploy_result = invoke_cli_runner(deploy, ["--environment", "default"])

            self.assertEqual(deploy_result.exit_code, 0)