def run_pipeline(self, pipeline, options): from apache_beam.pipeline import PipelineVisitor from apache_beam.runners.dataflow.native_io.iobase import NativeSource from apache_beam.runners.dataflow.native_io.iobase import _NativeWrite from apache_beam.testing.test_stream import TestStream class _FnApiRunnerSupportVisitor(PipelineVisitor): """Visitor determining if a Pipeline can be run on the FnApiRunner.""" def accept(self, pipeline): self.supported_by_fnapi_runner = True pipeline.visit(self) return self.supported_by_fnapi_runner def visit_transform(self, applied_ptransform): transform = applied_ptransform.transform # The FnApiRunner does not support streaming execution. if isinstance(transform, TestStream): self.supported_by_fnapi_runner = False # The FnApiRunner does not support reads from NativeSources. if (isinstance(transform, beam.io.Read) and isinstance(transform.source, NativeSource)): self.supported_by_fnapi_runner = False # The FnApiRunner does not support the use of _NativeWrites. if isinstance(transform, _NativeWrite): self.supported_by_fnapi_runner = False if isinstance(transform, beam.ParDo): dofn = transform.dofn # The FnApiRunner does not support execution of CombineFns with # deferred side inputs. if isinstance(dofn, CombineValuesDoFn): args, kwargs = transform.raw_side_inputs args_to_check = itertools.chain(args, kwargs.values()) if any( isinstance(arg, ArgumentPlaceholder) for arg in args_to_check): self.supported_by_fnapi_runner = False if userstate.is_stateful_dofn(dofn): _, timer_specs = userstate.get_dofn_specs(dofn) for timer in timer_specs: if timer.time_domain == TimeDomain.REAL_TIME: self.supported_by_fnapi_runner = False # Check whether all transforms used in the pipeline are supported by the # FnApiRunner, and the pipeline was not meant to be run as streaming. if _FnApiRunnerSupportVisitor().accept(pipeline): from apache_beam.portability.api import beam_provision_api_pb2 from apache_beam.runners.portability.fn_api_runner import fn_runner from apache_beam.runners.portability.portable_runner import JobServiceHandle all_options = options.get_all_options() encoded_options = JobServiceHandle.encode_pipeline_options( all_options) provision_info = fn_runner.ExtendedProvisionInfo( beam_provision_api_pb2.ProvisionInfo( pipeline_options=encoded_options)) runner = fn_runner.FnApiRunner(provision_info=provision_info) else: runner = BundleBasedDirectRunner() return runner.run_pipeline(pipeline, options)
def _run_job(self): self.set_state(beam_job_api_pb2.JobState.RUNNING) with JobLogHandler(self._log_queues): try: result = fn_runner.FnApiRunner( provision_info=self._provision_info).run_via_runner_api( self._pipeline_proto) _LOGGER.info('Successfully completed job.') self.set_state(beam_job_api_pb2.JobState.DONE) self.result = result except: # pylint: disable=bare-except _LOGGER.exception('Error running pipeline.') _LOGGER.exception(traceback) self.set_state(beam_job_api_pb2.JobState.FAILED) raise
def _run_job(self): self.set_state(beam_job_api_pb2.JobState.RUNNING) with JobLogHandler(self._log_queues) as log_handler: self._update_dependencies() try: start = time.time() result = fn_runner.FnApiRunner( provision_info=self._provision_info).run_via_runner_api( self._pipeline_proto) _LOGGER.info( 'Successfully completed job in %s seconds.', time.time() - start) self.set_state(beam_job_api_pb2.JobState.DONE) self.result = result except: # pylint: disable=bare-except self._log_queues.put( beam_job_api_pb2.JobMessage( message_id=log_handler._next_id(), time=time.strftime('%Y-%m-%d %H:%M:%S.'), importance=beam_job_api_pb2.JobMessage.JOB_MESSAGE_ERROR, message_text=traceback.format_exc())) _LOGGER.exception('Error running pipeline.') self.set_state(beam_job_api_pb2.JobState.FAILED) raise
def _invoke_runner(self): self.set_state(beam_job_api_pb2.JobState.RUNNING) return fn_runner.FnApiRunner( provision_info=self._provision_info).run_via_runner_api( self._pipeline_proto, self.pipeline_options())