def get_cluster_client(self, location: Optional[str] = None) -> ClusterControllerClient: """Returns ClusterControllerClient.""" client_options = {'api_endpoint': f'{location}-dataproc.googleapis.com:443'} if location else None return ClusterControllerClient( credentials=self._get_credentials(), client_info=self.client_info, client_options=client_options )
async def test_progress(self, monkeypatch): fake_creds = AnonymousCredentials() mock_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_logging_client = mock.create_autospec( logging_v2.LoggingServiceV2Client(credentials=fake_creds)) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_client, user=MockUser(), _mock=True, logging=mock_logging_client, gcs_notebooks=self.gcs_notebooks) spawner.project = "test-progress" async def collect(ait): items = [] async for value in ait: items.append(value) return items def create_logs(): entries = [] for i in range(5): e = LogEntry(insert_id=f'entry_{i}', json_payload=ParseDict( { 'method': 'method', 'message': f'message_{i}' }, Struct())) entries.append(e) return entries def create_expected(): progress = 5 expected = [] i = 0 for e in create_logs(): progress += math.ceil((90 - progress) / 4) expected.append({ 'progress': progress, 'message': f'method: message_{i}' }) i += 1 expected.append({'message': 'operation.done()', 'progress': 71}) return expected def test_list_log_entries(*args, **kwargs): return create_logs() op = MockOperation('op1', 'cluster1-op1') monkeypatch.setattr(mock_logging_client, 'list_log_entries', test_list_log_entries) monkeypatch.setattr(spawner, 'operation', op) _, _ = await spawner.start() assert await collect(spawner.progress()) == create_expected()
def test_minimium_cluster_definition(self, monkeypatch): """ Some keys must always be present for JupyterHub to work. """ import yaml def test_read_file(*args, **kwargs): config_string = open('./tests/test_data/minimum.yaml', 'r').read() return config_string def test_clustername(*args, **kwargs): return 'test-clustername' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.zone = "test-self1-b" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" config_built = spawner._build_cluster_config() assert 'project_id' in config_built assert 'cluster_name' in config_built assert config_built['project_id'] == 'test-project' assert config_built['cluster_name'] == 'test-clustername' assert config_built['config']['gce_cluster_config']['zone_uri'].split( '/')[-1] == 'test-self1-b' assert Component['JUPYTER'].value in config_built['config'][ 'software_config']['optional_components'] assert Component['ANACONDA'].value in config_built['config'][ 'software_config']['optional_components'] assert 'dataproc:jupyter.hub.args' in config_built['config'][ 'software_config']['properties'] assert 'dataproc:jupyter.hub.enabled' in config_built['config'][ 'software_config']['properties'] # assert 'dataproc:jupyter.notebook.gcs.dir' in config_built['config']['software_config']['properties'] assert 'dataproc:jupyter.hub.env' in config_built['config'][ 'software_config']['properties']
def test_locations(self, monkeypatch): import yaml def test_read_file(*args, **kwargs): config_string = open('./tests/test_data/basic_uri.yaml', 'r').read() return config_string def test_clustername(*args, **kwargs): return 'test-clustername' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.user_options = { 'cluster_type': 'basic_uri.yaml', 'cluster_zone': 'us-east1-d' } user_zone = spawner.user_options['cluster_zone'] user_region = user_zone[:-2] config_built = spawner._build_cluster_config() assert config_built['config']['gce_cluster_config'][ 'subnetwork_uri'].split('/')[-3] == user_region assert config_built['config']['master_config'][ 'machine_type_uri'] == 'n1-standard-4' assert config_built['config']['worker_config'][ 'machine_type_uri'] == 'n1-highmem-16' assert config_built['config']['secondary_worker_config'][ 'machine_type_uri'] == 'n1-standard-4' assert config_built['config']['master_config']['accelerators'][0][ 'accelerator_type_uri'] == 'nvidia-tesla-v100'
def test_cluster_definition_overrides(self, monkeypatch): """Check that config settings incompatible with JupyterHub are overwritten correctly.""" import yaml def test_read_file(*args, **kwargs): config_string = open('./tests/test_data/export.yaml', 'r').read() return config_string def test_clustername(*args, **kwargs): return 'test-clustername' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.user_options = { 'cluster_type': 'export.yaml', 'cluster_zone': 'test-form1-a' } config_built = spawner._build_cluster_config() # Verify that we disable Component Gateway (temporarily) assert config_built['config']['endpoint_config'][ 'enable_http_port_access'] == False # Verify that we disable preemptibility (temporarily) assert 'preemptibility' not in config_built['config']['master_config'] assert 'preemptibility' not in config_built['config']['worker_config'] # Verify that we removed cluster-specific namenode properties assert 'hdfs:dfs.namenode.lifeline.rpc-address' not in config_built[ 'config']['software_config']['properties'] assert 'hdfs:dfs.namenode.servicerpc-address' not in config_built[ 'config']['software_config']['properties']
def test_cluster_definition_keep_core_values(self, monkeypatch): """ Some system's default values must remain no matter what. """ import yaml def test_read_file(*args, **kwargs): config_string = open('./tests/test_data/basic.yaml', 'r').read() return config_string def test_clustername(*args, **kwargs): return 'test-clustername' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.user_options = { 'cluster_type': 'basic.yaml', 'cluster_zone': 'test-form1-a' } config_built = spawner._build_cluster_config() assert config_built['project_id'] == 'test-project' assert config_built['cluster_name'] == 'test-clustername' assert config_built['config']['software_config']['properties'][ 'dataproc:jupyter.hub.args'] == 'test-args-str' assert config_built['config']['software_config']['properties'][ 'dataproc:jupyter.hub.enabled'] == 'true' # assert config_built['config']['software_config']['properties']['dataproc:jupyter.notebook.gcs.dir'] == f'gs://users-notebooks/fake' assert config_built['config']['software_config']['properties'][ 'dataproc:jupyter.hub.env'] == 'test-env-str'
def test_image_version_supports_component_gateway(self): fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) assert spawner._validate_image_version_supports_component_gateway( '1.3') is True assert spawner._validate_image_version_supports_component_gateway( '1.3-debian9') is True assert spawner._validate_image_version_supports_component_gateway( '1.3.6-debian9') is False assert spawner._validate_image_version_supports_component_gateway( '1.3.59-debian9') is True assert spawner._validate_image_version_supports_component_gateway( '1.3.999-debian9') is True assert spawner._validate_image_version_supports_component_gateway( '1.4-debian10') is True assert spawner._validate_image_version_supports_component_gateway( '1.4.6-debian10') is False assert spawner._validate_image_version_supports_component_gateway( '1.4.31-debian10') is True assert spawner._validate_image_version_supports_component_gateway( '1.5-debian10') is True assert spawner._validate_image_version_supports_component_gateway( '1.5.0-debian10') is False assert spawner._validate_image_version_supports_component_gateway( '1.5.5-debian10') is True assert spawner._validate_image_version_supports_component_gateway( '2') is True assert spawner._validate_image_version_supports_component_gateway( '2.0') is True assert spawner._validate_image_version_supports_component_gateway( '2.0.0') is True assert spawner._validate_image_version_supports_component_gateway( '2.3.0') is True assert spawner._validate_image_version_supports_component_gateway( '2.0.0-RC1-preview') is True assert spawner._validate_image_version_supports_component_gateway( 'weird-unexpected-version-124.3.v2.2020-02-15') is True assert spawner._validate_image_version_supports_component_gateway( '1.3.weird-version-again') is True
async def test_poll_no_cluster(self): fake_creds = AnonymousCredentials() mock_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_client.get_cluster.return_value = None spawner = DataprocSpawner(hub=Hub(), dataproc=mock_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) spawner.project = 'test-poll-no-cluster' assert spawner.project == 'test-poll-no-cluster' assert await spawner.poll() == 1
def test_clean_gcs_path(self, monkeypatch): path = "gs://bucket/config/" fake_creds = AnonymousCredentials() mock_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) assert spawner._clean_gcs_path(path) == "gs://bucket/config" assert spawner._clean_gcs_path(path, return_gs=False) == "bucket/config" assert spawner._clean_gcs_path( path, return_slash=True) == "gs://bucket/config/"
async def test_domain_scoped_zonal_dns(self): fake_creds = AnonymousCredentials() mock_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) spawner.project = "test:domain-scoped" assert spawner.project == "test:domain-scoped" (ip, port) = await spawner.start() assert ip == f'dataprochub-fake-m.{self.zone}.c.domain-scoped.test.internal' assert port == 0
def test_cluster_definition_check_core_fields(self, monkeypatch): """ Values chosen by the user through the form overwrites others. If the admin wants to prevent that behavior, they should remove form elements. TODO(mayran): Check keys so users can not add custom ones. """ import yaml def test_read_file(*args, **kwargs): config_string = open('./tests/test_data/basic.yaml', 'r').read() return config_string def test_username(*args, **kwargs): return 'foo-user' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file) monkeypatch.setattr(spawner, "get_username", test_username) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.cluster_name_pattern = 'my-cluster-{}' spawner.user_options = { 'cluster_type': 'basic.yaml', 'cluster_zone': 'test-form1-a' } config_built = spawner._build_cluster_config() assert config_built['cluster_name'] == 'my-cluster-foo-user' assert config_built['project_id'] == 'test-project'
def test_duration(self, monkeypatch): import yaml def test_read_file(*args, **kwargs): config_string = open('./tests/test_data/duration.yaml', 'r').read() return config_string def test_clustername(*args, **kwargs): return 'test-clustername' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.user_options = { 'cluster_type': 'duration.yaml', 'cluster_zone': 'test-form1-a' } config_built = spawner._build_cluster_config() # Test 600s string assert config_built['config']['initialization_actions'][0][ 'execution_timeout']['seconds'] == 600 # Test Duration protobuf assert config_built['config']['initialization_actions'][1][ 'execution_timeout']['seconds'] == 600
def test_metadata(self, monkeypatch): import yaml def test_read_file(*args, **kwargs): config_string = open('./tests/test_data/basic.yaml', 'r').read() return config_string def test_clustername(*args, **kwargs): return 'test-clustername' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.user_options = { 'cluster_type': 'basic.yaml', 'cluster_zone': 'test-form1-a' } config_built = spawner._build_cluster_config() assert config_built['config']['gce_cluster_config']['metadata'] == { 'm1': 'v1', 'm2': 'v2', 'session-user': MockUser.name }
async def test_start_normal(self): operation = operations_pb2.Operation() # Mock the Dataproc API client fake_creds = AnonymousCredentials() mock_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_client.create_cluster.return_value = operation # Force no existing clusters to bypass the check in the spawner mock_client.get_cluster.return_value = None spawner = DataprocSpawner(hub=Hub(), dataproc=mock_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Test that the traitlets work spawner.project = 'test-create' assert spawner.project == 'test-create' assert spawner.region == self.region (ip, port) = await spawner.start() assert ip == f'dataprochub-fake-m.{self.zone}.c.{spawner.project}.internal' # JupyterHub defaults to 0 if no port set assert port == 0 mock_client.create_cluster.assert_called_once() assert spawner.cluster_definition['cluster_name'] == 'dataprochub-fake' assert ( spawner.cluster_definition['config']['gce_cluster_config'] ['zone_uri'] ) == ( f'https://www.googleapis.com/compute/v1/projects/{spawner.project}/zones/{spawner.zone}' ) env = json.loads( spawner.cluster_definition['config']['software_config'] ['properties']['dataproc:jupyter.hub.env']) assert env['JUPYTERHUB_API_URL'] is not None
async def test_start_existing_clustername(self): fake_creds = AnonymousCredentials() mock_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) spawner.project = "test-create-existing" assert spawner.project == "test-create-existing" (ip, port) = await spawner.start() assert ip == f'dataprochub-fake-m.{self.zone}.c.{spawner.project}.internal' assert port == 0 mock_client.create_cluster.assert_not_called()
async def test_poll_create(self): expected_response = {'status': {'state': ClusterStatus.State.CREATING}} expected_response = Cluster(**expected_response) fake_creds = AnonymousCredentials() mock_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_client.get_cluster.return_value = expected_response spawner = DataprocSpawner(hub=Hub(), dataproc=mock_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) spawner.project = 'test-poll-create' assert spawner.project == 'test-poll-create' assert await spawner.poll() == None
async def test_stop_normal(self): fake_creds = AnonymousCredentials() mock_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) spawner.project = 'test-stop' assert spawner.project == 'test-stop' assert spawner.region == self.region response = await spawner.stop() mock_client.delete_cluster.assert_called_once_with( project_id='test-stop', region=self.region, cluster_name='dataprochub-fake')
def get_cluster_client( self, region: Optional[str] = None, location: Optional[str] = None) -> ClusterControllerClient: """Returns ClusterControllerClient.""" if location is not None: warnings.warn( "Parameter `location` will be deprecated. " "Please provide value through `region` parameter instead.", DeprecationWarning, stacklevel=2, ) region = location client_options = None if region and region != 'global': client_options = { 'api_endpoint': f'{region}-dataproc.googleapis.com:443' } return ClusterControllerClient(credentials=self._get_credentials(), client_info=self.client_info, client_options=client_options)
def test_validate_proto(self, monkeypatch): import yaml def test_read_file(*args, **kwargs): config_string = open('./tests/test_data/unknown_fields.yaml', 'r').read() return config_string fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.user_options = { 'cluster_type': 'basic_uri.yaml', 'cluster_zone': 'us-east1-d' } cleaned_config = spawner.get_cluster_definition('') warnings = dataprocspawner.spawner._validate_proto( cleaned_config, Cluster) # Check that we had appropriate warning messages assert len(warnings) == 7 expected_warnings = [ 'Removing unknown/bad value BAD_ENUM_VALUE for field consume_reservation_type.', "Removing unknown field unknown_field for class <class 'google.cloud.dataproc_v1beta2.types.clusters.NodeInitializationAction'>", 'Removing unknown/bad value UNKNOWN_COMPONENT_1 for field optional_components.', 'Removing unknown/bad value UNKNOWN_COMPONENT_2 for field optional_components.', 'Removing unknown/bad value UNKNOWN_COMPONENT_3 for field optional_components.', "Removing unknown field unknown_field_config_level for class <class 'google.cloud.dataproc_v1beta2.types.clusters.ClusterConfig'>", "Removing unknown field unknown_field_top_level for class <class 'google.cloud.dataproc_v1beta2.types.clusters.Cluster'>", ] for w in expected_warnings: assert w in warnings, f'Expected message {w} in warnings {warnings}' raw_config = spawner.get_cluster_definition('') # Construct expected output del raw_config['unknown_field_top_level'] del raw_config['config']['unknown_field_config_level'] del raw_config['config']['initialization_actions'][0]['unknown_field'] del raw_config['config']['gce_cluster_config']['reservation_affinity'][ 'consume_reservation_type'] raw_config['config']['software_config']['optional_components'] = [ 'JUPYTER', 'ZEPPELIN', 'ANACONDA', 'PRESTO' ] # Coerce both of the outputs to proto so we can easily compare equality # this also sanity checks that we have actually stripped all unknown/bad # fields actual_proto = Cluster(cleaned_config) expected_proto = Cluster(raw_config) assert actual_proto == expected_proto # Now check that the config with resolved fields is correct as well config_built = spawner._build_cluster_config() print(config_built) assert 'unknown_field_top_level' not in config_built assert 'unknown_field_config_level' not in config_built['config'] assert 'unknown_field' not in config_built['config'][ 'initialization_actions'][0] assert 'consume_reservation_type' not in config_built['config'][ 'gce_cluster_config']['reservation_affinity'] assert raw_config['config']['software_config'][ 'optional_components'] == [ 'JUPYTER', 'ZEPPELIN', 'ANACONDA', 'PRESTO' ]
def test_uris(self, monkeypatch): """ Test that all official URI patterns work and geo location match.""" import yaml def test_read_file_string(*args, **kwargs): config_string = open('./tests/test_data/basic.yaml', 'r').read() return config_string def test_read_file_uri(*args, **kwargs): config_string = open('./tests/test_data/basic_uri.yaml', 'r').read() return config_string def test_clustername(*args, **kwargs): return 'test-clustername' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file_string) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.user_options = { 'cluster_type': 'basic.yaml', 'cluster_zone': 'test-form1-a' } config_built = spawner._build_cluster_config() assert config_built['config']['gce_cluster_config'][ 'subnetwork_uri'] == "default" # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file_uri) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.user_options = { 'cluster_type': 'basic.yaml', 'cluster_zone': 'test-form1-a' } config_built = spawner._build_cluster_config() assert config_built['config']['gce_cluster_config'][ 'subnetwork_uri'] == "projects/test-project/regions/us-east1/subnetworks/default"
def test_camel_case(self, monkeypatch): import yaml def test_read_file(*args, **kwargs): config_string = open('./tests/test_data/custom.yaml', 'r').read() return config_string def test_clustername(*args, **kwargs): return 'test-clustername' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(spawner, "read_gcs_file", test_read_file) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.region = "us-east1" spawner.zone = "us-east1-d" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.user_options = { 'cluster_type': 'custom.yaml', 'cluster_zone': 'test-form1-a' } config_built = spawner._build_cluster_config() expected_dict = { 'project_id': 'test-project', 'labels': { 'goog-dataproc-notebook-spawner': 'unknown' }, 'cluster_name': 'test-clustername', 'config': { 'autoscaling_config': { 'policy_uri': 'projects/my-project/regions/us-east1/autoscalingPolicies/policy-abc123' }, 'config_bucket': 'bucket-dash', 'endpoint_config': { 'enable_http_port_access': True }, 'gce_cluster_config': { 'metadata': { 'KeyCamelCase': 'UlowUlow', 'key_with_underscore': 'https://downloads.io/protected/files/enterprise-trial.tar.gz', 'key_with_underscore_too': 'some_UPPER_and_UlowerU:1234', 'session-user': MockUser.name }, 'zone_uri': 'https://www.googleapis.com/compute/v1/projects/test-project/zones/test-form1-a' }, 'initialization_actions': [], 'lifecycle_config': {}, 'master_config': { 'machine_type_uri': 'machine.1.2_numbers', 'min_cpu_platform': 'AUTOMATIC', 'disk_config': { 'boot_disk_size_gb': 1000 }, }, 'software_config': { 'image_version': '1.4-debian9', 'optional_components': [Component.JUPYTER.value, Component.ANACONDA.value], 'properties': { 'dataproc:jupyter.hub.args': 'test-args-str', 'dataproc:jupyter.hub.enabled': 'true', 'dataproc:jupyter.hub.env': 'test-env-str', 'dataproc:jupyter.notebook.gcs.dir': 'gs://users-notebooks/fake', 'key-with-dash:UPPER_UPPER': '4000', 'key-with-dash-too:UlowUlowUlow': '85196m', 'key:and.multiple.dots.lowUlowUlow': '13312m' } } } } assert expected_dict == config_built
def test_config_paths(self, monkeypatch): """ Checks that configuration paths are found. """ config_hierarchy = [ "bucket/listme/file_L1.yaml", "bucket/config/file_A1.yaml", "bucket/config/file_A2.yaml", "bucket/file_B1.yaml", "bucket-two/config/two/file_C1.yaml" ] expected = config_hierarchy def test_list_blobs(*args, **kwargs): """ Rewrites library function to reads a custom list of paths vs real GCS. https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/client.html#Client.list_blobs """ bucket_or_name = args[0] prefix = kwargs['prefix'] candidate_path = f'{bucket_or_name}/{prefix}' config_paths = [] for c in config_hierarchy: if c.startswith(candidate_path): fn = '/'.join(c.split('/')[1:]) b = Blob(bucket='dummy', name=fn) config_paths.append(b) return iter(config_paths) def test_clustername(*args, **kwargs): return 'test-clustername' fake_creds = AnonymousCredentials() mock_dataproc_client = mock.create_autospec( ClusterControllerClient(credentials=fake_creds)) mock_gcs_client = mock.create_autospec( storage.Client(credentials=fake_creds, project='project')) spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client, user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks) # Prevents a call to GCS. We return the local file instead. monkeypatch.setattr(mock_gcs_client, "list_blobs", test_list_blobs) monkeypatch.setattr(spawner, "clustername", test_clustername) spawner.project = "test-project" spawner.zone = "test-self1-b" spawner.env_str = "test-env-str" spawner.args_str = "test-args-str" spawner.dataproc_configs = ( "gs://bucket/config/," "bucket/config/file_A1.yaml," "bucket/file_B1.yaml," "bucket-notexist/file.yaml," "bucket/file-notexist.yaml," "bucket/listme/," "bucket/config-notexist/file.yaml," "gs://bucket/listme/,bucket/config,bucket-two,") read_paths = spawner._list_gcs_files(spawner.dataproc_configs) assert type(read_paths) == type(config_hierarchy) assert len(read_paths) == len(config_hierarchy) assert set(read_paths) == set(config_hierarchy)