def test_nodes_number(self): with self.assertRaises(AssertionError) as err: ClusterGenerator(num_workers=0, num_preemptible_workers=0, project_id=GCP_PROJECT, cluster_name=CLUSTER_NAME) self.assertIn("num_workers == 0 means single", str(err))
def test_nodes_number(self): with pytest.raises(AssertionError) as ctx: ClusterGenerator(num_workers=0, num_preemptible_workers=0, project_id=GCP_PROJECT, cluster_name=CLUSTER_NAME) assert "num_workers == 0 means single" in str(ctx.value)
def test_image_version(self): with self.assertRaises(ValueError) as err: ClusterGenerator( custom_image="custom_image", image_version="image_version", project_id=GCP_PROJECT, cluster_name=CLUSTER_NAME, ) self.assertIn("custom_image and image_version", str(err))
def test_image_version(self): with pytest.raises(ValueError) as ctx: ClusterGenerator( custom_image="custom_image", image_version="image_version", project_id=GCP_PROJECT, cluster_name=CLUSTER_NAME, ) assert "custom_image and image_version" in str(ctx.value)
def test_custom_image_family_error_with_custom_image(self): with pytest.raises(ValueError) as ctx: ClusterGenerator( custom_image="custom_image", custom_image_family="custom_image_family", project_id=GCP_PROJECT, cluster_name=CLUSTER_NAME, ) assert "custom_image and custom_image_family" in str(ctx.value)
def test_build(self): generator = ClusterGenerator( project_id="project_id", cluster_name="cluster_name", num_workers=2, zone="zone", network_uri="network_uri", subnetwork_uri="subnetwork_uri", internal_ip_only=True, tags=["tags"], storage_bucket="storage_bucket", init_actions_uris=["init_actions_uris"], init_action_timeout="10m", metadata={"metadata": "data"}, custom_image="custom_image", custom_image_project_id="custom_image_project_id", autoscaling_policy="autoscaling_policy", properties={"properties": "data"}, optional_components=["optional_components"], num_masters=2, master_machine_type="master_machine_type", master_disk_type="master_disk_type", master_disk_size=128, worker_machine_type="worker_machine_type", worker_disk_type="worker_disk_type", worker_disk_size=256, num_preemptible_workers=4, labels={"labels": "data"}, region="region", service_account="service_account", service_account_scopes=["service_account_scopes"], idle_delete_ttl=60, auto_delete_time=datetime(2019, 9, 12), auto_delete_ttl=250, customer_managed_key="customer_managed_key", ) cluster = generator.make() self.assertDictEqual(CLUSTER, cluster)
def test_nodes_number(self): with self.assertRaises(AssertionError) as err: ClusterGenerator(num_workers=0, num_preemptible_workers=0) self.assertIn("num_workers == 0 means single", str(err))
def test_image_version(self): with self.assertRaises(ValueError) as err: ClusterGenerator(custom_image="custom_image", image_version="image_version") self.assertIn("custom_image and image_version", str(err))
}, }, } # [END how_to_cloud_dataproc_create_cluster] # Cluster definition: Generating Cluster Config for DataprocClusterCreateOperator # [START how_to_cloud_dataproc_create_cluster_generate_cluster_config] path = "gs://goog-dataproc-initialization-actions-us-central1/python/pip-install.sh" CLUSTER_CONFIG = ClusterGenerator( project_id="test", zone="us-central1-a", master_machine_type="n1-standard-4", worker_machine_type="n1-standard-4", num_workers=2, storage_bucket="test", init_actions_uris=[path], metadata={ 'PIP_PACKAGES': 'pyyaml requests pandas openpyxl' }, ).make() create_cluster_operator = DataprocClusterCreateOperator( task_id='create_dataproc_cluster', cluster_name="test", project_id="test", region="us-central1", cluster_config=CLUSTER_CONFIG, ) # [END how_to_cloud_dataproc_create_cluster_generate_cluster_config]
}, }, } # [END how_to_cloud_dataproc_create_cluster] # Cluster definition: Generating Cluster Config for DataprocCreateClusterOperator # [START how_to_cloud_dataproc_create_cluster_generate_cluster_config] path = "gs://goog-dataproc-initialization-actions-us-central1/python/pip-install.sh" CLUSTER_GENERATOR_CONFIG = ClusterGenerator( project_id="test", zone="us-central1-a", master_machine_type="n1-standard-4", worker_machine_type="n1-standard-4", num_workers=2, storage_bucket="test", init_actions_uris=[path], metadata={ 'PIP_PACKAGES': 'pyyaml requests pandas openpyxl' }, ).make() create_cluster_operator = DataprocCreateClusterOperator( task_id='create_dataproc_cluster', cluster_name="test", project_id="test", region="us-central1", cluster_config=CLUSTER_GENERATOR_CONFIG, ) # [END how_to_cloud_dataproc_create_cluster_generate_cluster_config]