def run_pipeline(self, pipeline): if not hasattr(self, '_desired_cache_labels'): self._desired_cache_labels = set() # Invoke a round trip through the runner API. This makes sure the Pipeline # proto is stable. pipeline = beam.pipeline.Pipeline.from_runner_api( pipeline.to_runner_api(), pipeline.runner, pipeline._options) # Snapshot the pipeline in a portable proto before mutating it. pipeline_proto, original_context = pipeline.to_runner_api( return_context=True) pcolls_to_pcoll_id = self._pcolls_to_pcoll_id(pipeline, original_context) analyzer = pipeline_analyzer.PipelineAnalyzer( self._cache_manager, pipeline_proto, self._underlying_runner, pipeline._options, self._desired_cache_labels) # Should be only accessed for debugging purpose. self._analyzer = analyzer pipeline_to_execute = beam.pipeline.Pipeline.from_runner_api( analyzer.pipeline_proto_to_execute(), self._underlying_runner, pipeline._options) pipeline_info = pipeline_analyzer.PipelineInfo( pipeline_proto.components) display = display_manager.DisplayManager( pipeline_info=pipeline_info, pipeline_proto=pipeline_proto, caches_used=analyzer.caches_used(), cache_manager=self._cache_manager, referenced_pcollections=analyzer. top_level_referenced_pcollection_ids(), required_transforms=analyzer.top_level_required_transforms(), pipeline_graph_renderer=self._renderer) display.start_periodic_update() result = pipeline_to_execute.run() result.wait_until_finish() display.stop_periodic_update() return PipelineResult(result, self, pipeline_info, self._cache_manager, pcolls_to_pcoll_id)
def run_pipeline(self, pipeline, options): if not hasattr(self, '_desired_cache_labels'): self._desired_cache_labels = set() # Invoke a round trip through the runner API. This makes sure the Pipeline # proto is stable. pipeline = beam.pipeline.Pipeline.from_runner_api( pipeline.to_runner_api(use_fake_coders=True), pipeline.runner, options) # Snapshot the pipeline in a portable proto before mutating it. pipeline_proto, original_context = pipeline.to_runner_api( return_context=True, use_fake_coders=True) pcolls_to_pcoll_id = self._pcolls_to_pcoll_id(pipeline, original_context) analyzer = pipeline_analyzer.PipelineAnalyzer( self._cache_manager, pipeline_proto, self._underlying_runner, options, self._desired_cache_labels) # Should be only accessed for debugging purpose. self._analyzer = analyzer pipeline_to_execute = beam.pipeline.Pipeline.from_runner_api( analyzer.pipeline_proto_to_execute(), self._underlying_runner, options) if not self._skip_display: display = display_manager.DisplayManager( pipeline_proto=pipeline_proto, pipeline_analyzer=analyzer, cache_manager=self._cache_manager, pipeline_graph_renderer=self._renderer) display.start_periodic_update() result = pipeline_to_execute.run() result.wait_until_finish() if not self._skip_display: display.stop_periodic_update() return PipelineResult(result, self, self._analyzer.pipeline_info(), self._cache_manager, pcolls_to_pcoll_id)