def test_configure_cli_insecure(): runner = CliRunner() runner.invoke(cli.configure_cli, ['--token', '--insecure'], input=(TEST_HOST + '\n' + TEST_TOKEN + '\n')) assert get_config().host == TEST_HOST assert get_config().token == TEST_TOKEN assert get_config().insecure == 'True'
def test_configure_cli(): runner = CliRunner() runner.invoke(cli.configure_cli, input=(TEST_HOST + '\n' + TEST_USER + '\n' + TEST_PASSWORD + '\n' + TEST_PASSWORD + '\n')) assert get_config().host == TEST_HOST assert get_config().username == TEST_USER assert get_config().password == TEST_PASSWORD
def test_configure_cli_jobs_api_version_password(): runner = CliRunner() runner.invoke(cli.configure_cli, ['--jobs-api-version', '2.1'], input=(TEST_HOST + '\n' + TEST_USER + '\n' + TEST_PASSWORD + '\n' + TEST_PASSWORD + '\n')) assert get_config().jobs_api_version == '2.1' assert get_config().host == TEST_HOST assert get_config().username == TEST_USER assert get_config().password == TEST_PASSWORD
def test_configure_two_sections(): runner = CliRunner() runner.invoke(cli.configure_cli, ['--token'], input=(TEST_HOST + '\n' + TEST_TOKEN + '\n')) runner.invoke(cli.configure_cli, ['--token', '--profile', TEST_PROFILE], input=(TEST_HOST_2 + '\n' + TEST_TOKEN + '\n')) assert get_config().host == TEST_HOST assert get_config().token == TEST_TOKEN assert ProfileConfigProvider(TEST_PROFILE).get_config().host == TEST_HOST_2 assert ProfileConfigProvider(TEST_PROFILE).get_config().token == TEST_TOKEN
def test_configure_cli_jobs_api_version_file(): with tempfile.NamedTemporaryFile() as fp: fp.write(TEST_TOKEN.encode('utf-8')) fp.seek(0) runner = CliRunner() runner.invoke(cli.configure_cli, ['--jobs-api-version', '2.1', '--token-file', fp.name], input=(TEST_HOST + '\n')) assert get_config().jobs_api_version == '2.1' assert get_config().token == TEST_TOKEN
def test_get_config_uses_task_context_variable(): class TaskContextMock(object): def __init__(self): pass def getLocalProperty(self, x): # NOQA if x == "spark.databricks.api.url": return "url" elif x == "spark.databricks.token": return "token" elif x == "spark.databricks.ignoreTls": return "True" else: raise Exception("should not get here.") ctx_class = ( "databricks_cli.configure.provider.SparkTaskContextConfigProvider." "_get_spark_task_context_or_none") with patch(ctx_class) as get_context_mock: get_context_mock.return_value = TaskContextMock() config = get_config() assert config.host == "url" assert config.token == "token" assert config.insecure == "True" assert config.username is None assert config.password is None
def decorator(*args, **kwargs): ctx = click.get_current_context() command_name = "-".join(ctx.command_path.split(" ")[1:]) command_name += "-" + str(uuid.uuid1()) profile = get_profile_from_context() if profile: # If we request a specific profile, only get credentials from there. config = ProfileConfigProvider(profile).get_config() else: # If unspecified, use the default provider, or allow for user overrides. config = get_config() if not config or not config.is_valid: raise InvalidConfigurationError.for_profile(profile) # This checks if an OAuth access token has expired and will attempt to refresh it if # a refresh token is present if config.host and config.token and config.refresh_token: config.token, config.refresh_token, updated = \ check_and_refresh_access_token(config.host, config.token, config.refresh_token) if updated: update_and_persist_config(profile, config) kwargs['api_client'] = _get_api_client(config, command_name) return function(*args, **kwargs)
def get_databricks_host_creds(profile=None): """ Reads in configuration necessary to make HTTP requests to a Databricks server. This uses the Databricks CLI's ConfigProvider interface to load the DatabricksConfig object. This method will throw an exception if sufficient auth cannot be found. :param profile: Databricks CLI profile. If not provided, we will read the default profile. :return: :py:class:`mlflow.rest_utils.MlflowHostCreds` which includes the hostname and authentication information necessary to talk to the Databricks server. """ if not hasattr(provider, 'get_config'): eprint( "Warning: support for databricks-cli<0.8.0 is deprecated and will be removed" " in a future version.") config = provider.get_config_for_profile(profile) elif profile: config = provider.ProfileConfigProvider(profile).get_config() else: config = provider.get_config() if not config or not config.host: _fail_malformed_databricks_auth(profile) insecure = hasattr(config, 'insecure') and config.insecure if config.username is not None and config.password is not None: return MlflowHostCreds(config.host, username=config.username, password=config.password, ignore_tls_verification=insecure) elif config.token: return MlflowHostCreds(config.host, token=config.token, ignore_tls_verification=insecure) _fail_malformed_databricks_auth(profile)
def test_configure_cli_jobs_api_version_aad_token(): with patch.dict('os.environ', {ENV_AAD_TOKEN: 'token'}): runner = CliRunner() runner.invoke(cli.configure_cli, ['--jobs-api-version', '2.1', '--aad-token'], input=(TEST_HOST + '\n')) assert get_config().jobs_api_version == '2.1'
def test_get_config_uses_default_profile(): config = DatabricksConfig.from_token("hosty", "hello") update_and_persist_config(DEFAULT_SECTION, config) config = get_config() assert config.is_valid_with_token assert config.host == "hosty" assert config.token == "hello"
def test_get_config_uses_env_variable(): with patch.dict('os.environ', {'DATABRICKS_HOST': TEST_HOST, 'DATABRICKS_USERNAME': TEST_USER, 'DATABRICKS_PASSWORD': TEST_PASSWORD}): config = get_config() assert config.host == TEST_HOST assert config.username == TEST_USER assert config.password == TEST_PASSWORD
def get_databricks_host_creds(server_uri=None): """ Reads in configuration necessary to make HTTP requests to a Databricks server. This uses the Databricks CLI's ConfigProvider interface to load the DatabricksConfig object. If no Databricks CLI profile is found corresponding to the server URI, this function will attempt to retrieve these credentials from the Databricks Secret Manager. For that to work, the server URI will need to be of the following format: "databricks://scope:prefix". In the Databricks Secret Manager, we will query for a secret in the scope "<scope>" for secrets with keys of the form "<prefix>-host" and "<prefix>-token". Note that this prefix *cannot* be empty if trying to authenticate with this method. If found, those host credentials will be used. This method will throw an exception if sufficient auth cannot be found. :param server_uri: A URI that specifies the Databricks profile you want to use for making requests. :return: :py:class:`mlflow.rest_utils.MlflowHostCreds` which includes the hostname and authentication information necessary to talk to the Databricks server. """ profile, path = get_db_info_from_uri(server_uri) if not hasattr(provider, "get_config"): _logger.warning( "Support for databricks-cli<0.8.0 is deprecated and will be removed" " in a future version.") config = provider.get_config_for_profile(profile) elif profile: config = provider.ProfileConfigProvider(profile).get_config() else: config = provider.get_config() # if a path is specified, that implies a Databricks tracking URI of the form: # databricks://profile-name/path-specifier if (not config or not config.host) and path: dbutils = _get_dbutils() if dbutils: # Prefix differentiates users and is provided as path information in the URI key_prefix = path host = dbutils.secrets.get(scope=profile, key=key_prefix + "-host") token = dbutils.secrets.get(scope=profile, key=key_prefix + "-token") if host and token: config = provider.DatabricksConfig.from_token(host=host, token=token, insecure=False) if not config or not config.host: _fail_malformed_databricks_auth(profile) insecure = hasattr(config, "insecure") and config.insecure if config.username is not None and config.password is not None: return MlflowHostCreds( config.host, username=config.username, password=config.password, ignore_tls_verification=insecure, ) elif config.token: return MlflowHostCreds(config.host, token=config.token, ignore_tls_verification=insecure) _fail_malformed_databricks_auth(profile)
def _get_local_and_remote_folders(alt_remote_folder=None): repo_path, repo_name = _get_repo_path_and_name() local = '{}/notebooks'.format(repo_path) username = get_config().username if alt_remote_folder is not None: remote = alt_remote_folder else: remote = '/Users/{}/{}'.format(username, repo_name) return local, remote
def test_get_config_uses_path_env_variable(tmpdir): cfg_file = tmpdir.join("some-cfg-path").strpath with patch.dict('os.environ', {'DATABRICKS_CONFIG_FILE': cfg_file}): config = DatabricksConfig.from_token("hosty", "hello") update_and_persist_config(DEFAULT_SECTION, config) config = get_config() assert os.path.exists(cfg_file) assert not os.path.exists(_get_path()) assert config.is_valid_with_token assert config.host == "hosty" assert config.token == "hello"
def test_get_config_override_profile(): config = DatabricksConfig.from_token("yo", "lo") update_and_persist_config(TEST_PROFILE, config) try: provider = ProfileConfigProvider(TEST_PROFILE) set_config_provider(provider) config = get_config() assert config.host == "yo" assert config.token == "lo" finally: set_config_provider(None)
def test_get_config_override_custom(): class TestConfigProvider(DatabricksConfigProvider): def get_config(self): return DatabricksConfig.from_token("Override", "Token!") try: provider = TestConfigProvider() set_config_provider(provider) config = get_config() assert config.host == "Override" assert config.token == "Token!" finally: set_config_provider(None)
def get_databricks_http_request_kwargs_or_fail(profile=None): """ Reads in configuration necessary to make HTTP requests to a Databricks server. This uses the Databricks CLI's ConfigProvider interface to load the DatabricksConfig object. This method will throw an exception if sufficient auth cannot be found. :param profile: Databricks CLI profile. If not provided, we will read the default profile. :return: Dictionary with parameters that can be passed to http_request(). This will at least include the hostname and headers sufficient to authenticate to Databricks. """ if not hasattr(provider, 'get_config'): eprint( "Warning: support for databricks-cli<0.8.0 is deprecated and will be removed" " in a future version.") config = provider.get_config_for_profile(profile) elif profile: config = provider.ProfileConfigProvider(profile).get_config() else: config = provider.get_config() hostname = config.host if not hostname: _fail_malformed_databricks_auth(profile) auth_str = None if config.username is not None and config.password is not None: basic_auth_str = ("%s:%s" % (config.username, config.password)).encode("utf-8") auth_str = "Basic " + base64.standard_b64encode(basic_auth_str).decode( "utf-8") elif config.token: auth_str = "Bearer %s" % config.token else: _fail_malformed_databricks_auth(profile) headers = { "Authorization": auth_str, } verify = True if hasattr(config, 'insecure') and config.insecure: verify = False return { 'hostname': hostname, 'headers': headers, 'verify': verify, }
def decorator(*args, **kwargs): ctx = click.get_current_context() command_name = "-".join(ctx.command_path.split(" ")[1:]) command_name += "-" + str(uuid.uuid1()) profile = get_profile_from_context() if profile: # If we request a specific profile, only get credentials from tere. config = ProfileConfigProvider(profile).get_config() else: # If unspecified, use the default provider, or allow for user overrides. config = get_config() if not config or not config.is_valid: raise InvalidConfigurationError.for_profile(profile) kwargs['api_client'] = _get_api_client(config, command_name) return function(*args, **kwargs)
def get_credentials(profile): cfg = provider.get_config( ) if profile is None else provider.get_config_for_profile(profile) return (cfg.host, cfg.token)
def test_configure_cli_jobs_api_version(): runner = CliRunner() runner.invoke(cli.configure_cli, ['--jobs-api-version', '2.1', '--token'], input=(TEST_HOST + '\n' + TEST_TOKEN + '\n')) assert get_config().jobs_api_version == '2.1'
def get_host_token(profile=None): cfg = provider.get_config( ) if profile is None else provider.get_config_for_profile(profile) return (cfg.host, cfg.token)
def test_get_config_throw_exception_if_profile_absent(): assert not os.path.exists(_get_path()) with pytest.raises(InvalidConfigurationError): get_config()
def test_get_config_throw_exception_if_profile_invalid(): invalid_config = DatabricksConfig.from_token(None, None) update_and_persist_config(DEFAULT_SECTION, invalid_config) with pytest.raises(InvalidConfigurationError): get_config()
def get_credentials(profile): from databricks_cli.configure import provider cfg = provider.get_config( ) if profile is None else provider.get_config_for_profile(profile) return (cfg.host, cfg.token)
def get_host_token(profile=None): """ Get the host and token for a profile from ~/.databrickscfg. """ cfg = provider.get_config( ) if profile is None else provider.get_config_for_profile(profile) return (cfg.host, cfg.token)
from databricks_cli.configure.provider import get_config from pathlib import Path # COMMAND ---------- dbfs_home_path = Path("dbfs:/home/{}/".format(user)) run_metadata_delta_path = str(dbfs_home_path / "genomics/data/delta/pipeline_runs_info_hail_glow.delta") # COMMAND ---------- cluster_id=dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().apply('clusterId') # COMMAND ---------- cs = ClusterService(_get_api_client(get_config())) _list = cs.list_clusters()['clusters'] conv = lambda x: {c:v for c,v in x.items() if type(v) in (str, int)} cluster_info = spark.createDataFrame([conv(x) for x in _list]) cluster_info = cluster_info.where(fx.col("cluster_id") == cluster_id) worker_info = cluster_info.select("node_type_id", "num_workers", "spark_version", "creator_user_name").collect() node_type_id = worker_info[0].node_type_id n_workers = worker_info[0].num_workers spark_version = worker_info[0].spark_version creator_user_name = worker_info[0].creator_user_name # COMMAND ---------- display(cluster_info) # COMMAND ----------