def test_minimium_cluster_definition(self, monkeypatch):
        """ Some keys must always be present for JupyterHub to work. """
        import yaml

        def test_read_file(*args, **kwargs):
            config_string = open('./tests/test_data/minimum.yaml', 'r').read()
            return config_string

        def test_clustername(*args, **kwargs):
            return 'test-clustername'

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
        monkeypatch.setattr(spawner, "clustername", test_clustername)

        spawner.project = "test-project"
        spawner.zone = "test-self1-b"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"

        config_built = spawner._build_cluster_config()

        assert 'project_id' in config_built
        assert 'cluster_name' in config_built

        assert config_built['project_id'] == 'test-project'
        assert config_built['cluster_name'] == 'test-clustername'

        assert config_built['config']['gce_cluster_config']['zone_uri'].split(
            '/')[-1] == 'test-self1-b'

        assert Component['JUPYTER'].value in config_built['config'][
            'software_config']['optional_components']
        assert Component['ANACONDA'].value in config_built['config'][
            'software_config']['optional_components']

        assert 'dataproc:jupyter.hub.args' in config_built['config'][
            'software_config']['properties']
        assert 'dataproc:jupyter.hub.enabled' in config_built['config'][
            'software_config']['properties']
        # assert 'dataproc:jupyter.notebook.gcs.dir' in config_built['config']['software_config']['properties']
        assert 'dataproc:jupyter.hub.env' in config_built['config'][
            'software_config']['properties']
    def test_locations(self, monkeypatch):
        import yaml

        def test_read_file(*args, **kwargs):
            config_string = open('./tests/test_data/basic_uri.yaml',
                                 'r').read()
            return config_string

        def test_clustername(*args, **kwargs):
            return 'test-clustername'

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
        monkeypatch.setattr(spawner, "clustername", test_clustername)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.user_options = {
            'cluster_type': 'basic_uri.yaml',
            'cluster_zone': 'us-east1-d'
        }

        user_zone = spawner.user_options['cluster_zone']
        user_region = user_zone[:-2]

        config_built = spawner._build_cluster_config()

        assert config_built['config']['gce_cluster_config'][
            'subnetwork_uri'].split('/')[-3] == user_region
        assert config_built['config']['master_config'][
            'machine_type_uri'] == 'n1-standard-4'
        assert config_built['config']['worker_config'][
            'machine_type_uri'] == 'n1-highmem-16'
        assert config_built['config']['secondary_worker_config'][
            'machine_type_uri'] == 'n1-standard-4'
        assert config_built['config']['master_config']['accelerators'][0][
            'accelerator_type_uri'] == 'nvidia-tesla-v100'
    def test_cluster_definition_overrides(self, monkeypatch):
        """Check that config settings incompatible with JupyterHub are overwritten correctly."""
        import yaml

        def test_read_file(*args, **kwargs):
            config_string = open('./tests/test_data/export.yaml', 'r').read()
            return config_string

        def test_clustername(*args, **kwargs):
            return 'test-clustername'

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
        monkeypatch.setattr(spawner, "clustername", test_clustername)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.user_options = {
            'cluster_type': 'export.yaml',
            'cluster_zone': 'test-form1-a'
        }

        config_built = spawner._build_cluster_config()

        # Verify that we disable Component Gateway (temporarily)
        assert config_built['config']['endpoint_config'][
            'enable_http_port_access'] == False
        # Verify that we disable preemptibility (temporarily)
        assert 'preemptibility' not in config_built['config']['master_config']
        assert 'preemptibility' not in config_built['config']['worker_config']
        # Verify that we removed cluster-specific namenode properties
        assert 'hdfs:dfs.namenode.lifeline.rpc-address' not in config_built[
            'config']['software_config']['properties']
        assert 'hdfs:dfs.namenode.servicerpc-address' not in config_built[
            'config']['software_config']['properties']
    def test_cluster_definition_keep_core_values(self, monkeypatch):
        """ Some system's default values must remain no matter what. """
        import yaml

        def test_read_file(*args, **kwargs):
            config_string = open('./tests/test_data/basic.yaml', 'r').read()
            return config_string

        def test_clustername(*args, **kwargs):
            return 'test-clustername'

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
        monkeypatch.setattr(spawner, "clustername", test_clustername)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.user_options = {
            'cluster_type': 'basic.yaml',
            'cluster_zone': 'test-form1-a'
        }

        config_built = spawner._build_cluster_config()

        assert config_built['project_id'] == 'test-project'
        assert config_built['cluster_name'] == 'test-clustername'

        assert config_built['config']['software_config']['properties'][
            'dataproc:jupyter.hub.args'] == 'test-args-str'
        assert config_built['config']['software_config']['properties'][
            'dataproc:jupyter.hub.enabled'] == 'true'
        # assert config_built['config']['software_config']['properties']['dataproc:jupyter.notebook.gcs.dir'] == f'gs://users-notebooks/fake'
        assert config_built['config']['software_config']['properties'][
            'dataproc:jupyter.hub.env'] == 'test-env-str'
    def test_duration(self, monkeypatch):
        import yaml

        def test_read_file(*args, **kwargs):
            config_string = open('./tests/test_data/duration.yaml', 'r').read()
            return config_string

        def test_clustername(*args, **kwargs):
            return 'test-clustername'

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
        monkeypatch.setattr(spawner, "clustername", test_clustername)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.user_options = {
            'cluster_type': 'duration.yaml',
            'cluster_zone': 'test-form1-a'
        }

        config_built = spawner._build_cluster_config()

        # Test 600s string
        assert config_built['config']['initialization_actions'][0][
            'execution_timeout']['seconds'] == 600
        # Test Duration protobuf
        assert config_built['config']['initialization_actions'][1][
            'execution_timeout']['seconds'] == 600
    def test_cluster_definition_check_core_fields(self, monkeypatch):
        """ Values chosen by the user through the form overwrites others. If the
    admin wants to prevent that behavior, they should remove form elements.
    TODO(mayran): Check keys so users can not add custom ones. """
        import yaml

        def test_read_file(*args, **kwargs):
            config_string = open('./tests/test_data/basic.yaml', 'r').read()
            return config_string

        def test_username(*args, **kwargs):
            return 'foo-user'

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
        monkeypatch.setattr(spawner, "get_username", test_username)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.cluster_name_pattern = 'my-cluster-{}'
        spawner.user_options = {
            'cluster_type': 'basic.yaml',
            'cluster_zone': 'test-form1-a'
        }

        config_built = spawner._build_cluster_config()

        assert config_built['cluster_name'] == 'my-cluster-foo-user'
        assert config_built['project_id'] == 'test-project'
    def test_metadata(self, monkeypatch):
        import yaml

        def test_read_file(*args, **kwargs):
            config_string = open('./tests/test_data/basic.yaml', 'r').read()
            return config_string

        def test_clustername(*args, **kwargs):
            return 'test-clustername'

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
        monkeypatch.setattr(spawner, "clustername", test_clustername)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.user_options = {
            'cluster_type': 'basic.yaml',
            'cluster_zone': 'test-form1-a'
        }

        config_built = spawner._build_cluster_config()

        assert config_built['config']['gce_cluster_config']['metadata'] == {
            'm1': 'v1',
            'm2': 'v2',
            'session-user': MockUser.name
        }
    def test_validate_proto(self, monkeypatch):
        import yaml

        def test_read_file(*args, **kwargs):
            config_string = open('./tests/test_data/unknown_fields.yaml',
                                 'r').read()
            return config_string

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.user_options = {
            'cluster_type': 'basic_uri.yaml',
            'cluster_zone': 'us-east1-d'
        }

        cleaned_config = spawner.get_cluster_definition('')
        warnings = dataprocspawner.spawner._validate_proto(
            cleaned_config, Cluster)

        # Check that we had appropriate warning messages
        assert len(warnings) == 7
        expected_warnings = [
            'Removing unknown/bad value BAD_ENUM_VALUE for field consume_reservation_type.',
            "Removing unknown field unknown_field for class <class 'google.cloud.dataproc_v1beta2.types.clusters.NodeInitializationAction'>",
            'Removing unknown/bad value UNKNOWN_COMPONENT_1 for field optional_components.',
            'Removing unknown/bad value UNKNOWN_COMPONENT_2 for field optional_components.',
            'Removing unknown/bad value UNKNOWN_COMPONENT_3 for field optional_components.',
            "Removing unknown field unknown_field_config_level for class <class 'google.cloud.dataproc_v1beta2.types.clusters.ClusterConfig'>",
            "Removing unknown field unknown_field_top_level for class <class 'google.cloud.dataproc_v1beta2.types.clusters.Cluster'>",
        ]
        for w in expected_warnings:
            assert w in warnings, f'Expected message {w} in warnings {warnings}'

        raw_config = spawner.get_cluster_definition('')
        # Construct expected output
        del raw_config['unknown_field_top_level']
        del raw_config['config']['unknown_field_config_level']
        del raw_config['config']['initialization_actions'][0]['unknown_field']
        del raw_config['config']['gce_cluster_config']['reservation_affinity'][
            'consume_reservation_type']
        raw_config['config']['software_config']['optional_components'] = [
            'JUPYTER', 'ZEPPELIN', 'ANACONDA', 'PRESTO'
        ]

        # Coerce both of the outputs to proto so we can easily compare equality
        # this also sanity checks that we have actually stripped all unknown/bad
        # fields
        actual_proto = Cluster(cleaned_config)
        expected_proto = Cluster(raw_config)

        assert actual_proto == expected_proto

        # Now check that the config with resolved fields is correct as well
        config_built = spawner._build_cluster_config()
        print(config_built)

        assert 'unknown_field_top_level' not in config_built
        assert 'unknown_field_config_level' not in config_built['config']
        assert 'unknown_field' not in config_built['config'][
            'initialization_actions'][0]
        assert 'consume_reservation_type' not in config_built['config'][
            'gce_cluster_config']['reservation_affinity']
        assert raw_config['config']['software_config'][
            'optional_components'] == [
                'JUPYTER', 'ZEPPELIN', 'ANACONDA', 'PRESTO'
            ]
    def test_uris(self, monkeypatch):
        """ Test that all official URI patterns work and geo location match."""
        import yaml

        def test_read_file_string(*args, **kwargs):
            config_string = open('./tests/test_data/basic.yaml', 'r').read()
            return config_string

        def test_read_file_uri(*args, **kwargs):
            config_string = open('./tests/test_data/basic_uri.yaml',
                                 'r').read()
            return config_string

        def test_clustername(*args, **kwargs):
            return 'test-clustername'

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file_string)
        monkeypatch.setattr(spawner, "clustername", test_clustername)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.user_options = {
            'cluster_type': 'basic.yaml',
            'cluster_zone': 'test-form1-a'
        }

        config_built = spawner._build_cluster_config()

        assert config_built['config']['gce_cluster_config'][
            'subnetwork_uri'] == "default"

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file_uri)
        monkeypatch.setattr(spawner, "clustername", test_clustername)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.user_options = {
            'cluster_type': 'basic.yaml',
            'cluster_zone': 'test-form1-a'
        }

        config_built = spawner._build_cluster_config()

        assert config_built['config']['gce_cluster_config'][
            'subnetwork_uri'] == "projects/test-project/regions/us-east1/subnetworks/default"
    def test_camel_case(self, monkeypatch):
        import yaml

        def test_read_file(*args, **kwargs):
            config_string = open('./tests/test_data/custom.yaml', 'r').read()
            return config_string

        def test_clustername(*args, **kwargs):
            return 'test-clustername'

        fake_creds = AnonymousCredentials()
        mock_dataproc_client = mock.create_autospec(
            ClusterControllerClient(credentials=fake_creds))
        mock_gcs_client = mock.create_autospec(
            storage.Client(credentials=fake_creds, project='project'))
        spawner = DataprocSpawner(hub=Hub(),
                                  dataproc=mock_dataproc_client,
                                  gcs=mock_gcs_client,
                                  user=MockUser(),
                                  _mock=True,
                                  gcs_notebooks=self.gcs_notebooks)

        # Prevents a call to GCS. We return the local file instead.
        monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
        monkeypatch.setattr(spawner, "clustername", test_clustername)

        spawner.project = "test-project"
        spawner.region = "us-east1"
        spawner.zone = "us-east1-d"
        spawner.env_str = "test-env-str"
        spawner.args_str = "test-args-str"
        spawner.user_options = {
            'cluster_type': 'custom.yaml',
            'cluster_zone': 'test-form1-a'
        }

        config_built = spawner._build_cluster_config()

        expected_dict = {
            'project_id': 'test-project',
            'labels': {
                'goog-dataproc-notebook-spawner': 'unknown'
            },
            'cluster_name': 'test-clustername',
            'config': {
                'autoscaling_config': {
                    'policy_uri':
                    'projects/my-project/regions/us-east1/autoscalingPolicies/policy-abc123'
                },
                'config_bucket': 'bucket-dash',
                'endpoint_config': {
                    'enable_http_port_access': True
                },
                'gce_cluster_config': {
                    'metadata': {
                        'KeyCamelCase': 'UlowUlow',
                        'key_with_underscore':
                        'https://downloads.io/protected/files/enterprise-trial.tar.gz',
                        'key_with_underscore_too':
                        'some_UPPER_and_UlowerU:1234',
                        'session-user': MockUser.name
                    },
                    'zone_uri':
                    'https://www.googleapis.com/compute/v1/projects/test-project/zones/test-form1-a'
                },
                'initialization_actions': [],
                'lifecycle_config': {},
                'master_config': {
                    'machine_type_uri': 'machine.1.2_numbers',
                    'min_cpu_platform': 'AUTOMATIC',
                    'disk_config': {
                        'boot_disk_size_gb': 1000
                    },
                },
                'software_config': {
                    'image_version':
                    '1.4-debian9',
                    'optional_components':
                    [Component.JUPYTER.value, Component.ANACONDA.value],
                    'properties': {
                        'dataproc:jupyter.hub.args': 'test-args-str',
                        'dataproc:jupyter.hub.enabled': 'true',
                        'dataproc:jupyter.hub.env': 'test-env-str',
                        'dataproc:jupyter.notebook.gcs.dir':
                        'gs://users-notebooks/fake',
                        'key-with-dash:UPPER_UPPER': '4000',
                        'key-with-dash-too:UlowUlowUlow': '85196m',
                        'key:and.multiple.dots.lowUlowUlow': '13312m'
                    }
                }
            }
        }
        assert expected_dict == config_built