def test_clusters_cleanup_skip_on_duplicate(self, mock_master_url): clusters = ib.Clusters() project = 'test-project' region = 'test-region' p1 = beam.Pipeline(options=PipelineOptions( project=project, region=region, )) p2 = beam.Pipeline(options=PipelineOptions( project=project, region=region, )) cluster_metadata_1 = MasterURLIdentifier(project_id=project, region=region) clusters.dataproc_cluster_managers[str( id(p1))] = DataprocClusterManager(cluster_metadata_1) clusters.dataproc_cluster_managers[str(id(p1))].master_url = 'test_url' clusters.master_urls_to_pipelines['test_url'].append(str(id(p1))) cluster_metadata_2 = MasterURLIdentifier(project_id=project, region=region) clusters.dataproc_cluster_managers[str( id(p1))] = DataprocClusterManager(cluster_metadata_2) clusters.dataproc_cluster_managers[str(id(p1))].master_url = 'test_url' clusters.master_urls_to_pipelines['test_url'].append(str(id(p2))) from apache_beam.runners.interactive.interactive_beam import _LOGGER with self.assertLogs(_LOGGER, level='WARNING') as context_manager: clusters.cleanup(p1) self.assertTrue('skipping deletion' in context_manager.output[0])
def test_get_master_url_flink_master_provided(self): runner = interactive_runner.InteractiveRunner() from apache_beam.runners.portability.flink_runner import FlinkRunner p = beam.Pipeline( interactive_runner.InteractiveRunner(underlying_runner=FlinkRunner()), options=PipelineOptions(flink_master='--flink_master=test.internal:1')) runner._get_dataproc_cluster_master_url_if_applicable(p) self.assertEqual(ie.current_env().clusters.describe(), {}) ie.current_env().clusters = ib.Clusters()
def test_clusters_cleanup_cluster_manager_not_found(self): clusters = ib.Clusters() p = beam.Pipeline(options=PipelineOptions( project='test-project', region='test-region', )) from apache_beam.runners.interactive.interactive_beam import _LOGGER with self.assertLogs(_LOGGER, level='ERROR') as context_manager: clusters.cleanup(p) self.assertTrue('No cluster_manager is associated' in context_manager.output[0])
def test_get_master_url_no_flink_master_or_provided_master_url( self, mock_create_cluster): from apache_beam.runners.portability.flink_runner import FlinkRunner runner = interactive_runner.InteractiveRunner( underlying_runner=FlinkRunner()) p = beam.Pipeline(options=PipelineOptions( project='test-project', region='test-region', )) runner._get_dataproc_cluster_master_url_if_applicable(p) self.assertEqual( ie.current_env().clusters.describe(p) ['cluster_metadata'].project_id, 'test-project') ie.current_env().clusters = ib.Clusters()
def test_clusters_describe(self): clusters = ib.Clusters() project = 'test-project' region = 'test-region' p = beam.Pipeline( options=PipelineOptions( project=project, region=region, )) cluster_metadata = MasterURLIdentifier(project_id=project, region=region) clusters.dataproc_cluster_managers[p] = DataprocClusterManager( cluster_metadata) self.assertEqual('test-project', clusters.describe()[None] \ ['cluster_metadata'].project_id)
def test_get_master_url_no_flink_master_and_master_url_exists(self, m_env): clusters = ib.Clusters() m_env().clusters = clusters from apache_beam.runners.portability.flink_runner import FlinkRunner runner = interactive_runner.InteractiveRunner( underlying_runner=FlinkRunner()) p = beam.Pipeline(options=PipelineOptions( project='test-project', region='test-region', )) cluster_name = clusters.default_cluster_name cluster_metadata = MasterURLIdentifier(project_id='test-project', region='test-region', cluster_name=cluster_name) clusters.master_urls['test-url'] = cluster_metadata clusters.master_urls_to_dashboards['test-url'] = 'test-dashboard' flink_master = runner._get_dataproc_cluster_master_url_if_applicable(p) self.assertEqual( clusters.describe(p)['cluster_metadata'].project_id, 'test-project') self.assertEqual(flink_master, clusters.describe(p)['master_url'])
def setUp(self): self.patcher = patch( 'apache_beam.runners.interactive.interactive_environment.current_env') self.m_env = self.patcher.start() self.m_env().clusters = ib.Clusters()