Python JobServiceStub示例，apache_beam.portability.api.beam_job_api_pb2_grpc.JobServiceStub Python示例

示例#1

0

显示文件

文件： portable_runner_test.py 项目： yuhonghong7035/beam

 def _start_local_runner_subprocess_job_service(cls):
     cls._maybe_kill_subprocess()
     # TODO(robertwb): Consider letting the subprocess pick one and
     # communicate it back...
     port = cls._pick_unused_port()
     logging.info('Starting server on port %d.', port)
     cls._subprocess = subprocess.Popen(cls._subprocess_command(port))
     address = 'localhost:%d' % port
     job_service = beam_job_api_pb2_grpc.JobServiceStub(
         grpc.insecure_channel(address))
     logging.info('Waiting for server to be ready...')
     start = time.time()
     timeout = 30
     while True:
         time.sleep(0.1)
         if cls._subprocess.poll() is not None:
             raise RuntimeError(
                 'Subprocess terminated unexpectedly with exit code %d.' %
                 cls._subprocess.returncode)
         elif time.time() - start > timeout:
             raise RuntimeError(
                 'Pipeline timed out waiting for job service subprocess.')
         else:
             try:
                 job_service.GetState(
                     beam_job_api_pb2.GetJobStateRequest(job_id='[fake]'))
                 break
             except grpc.RpcError as exn:
                 if exn.code() != grpc.StatusCode.UNAVAILABLE:
                     # We were able to contact the service for our fake state request.
                     break
     logging.info('Server ready.')
     return address

示例#2

0

显示文件

 def start(self):
     with self._process_lock:
         if self._process:
             self.stop()
         cmd, endpoint = self.subprocess_cmd_and_endpoint()
         logging.debug("Starting job service with %s", cmd)
         try:
             self._process = subprocess.Popen([str(arg) for arg in cmd])
             self._local_temp_root = tempfile.mkdtemp(prefix='beam-temp')
             wait_secs = .1
             channel = grpc.insecure_channel(endpoint)
             channel_ready = grpc.channel_ready_future(channel)
             while True:
                 if self._process.poll() is not None:
                     logging.error("Starting job service with %s", cmd)
                     raise RuntimeError(
                         'Job service failed to start up with error %s' %
                         self._process.poll())
                 try:
                     channel_ready.result(timeout=wait_secs)
                     break
                 except (grpc.FutureTimeoutError,
                         grpc._channel._Rendezvous):
                     wait_secs *= 1.2
                     logging.log(
                         logging.WARNING
                         if wait_secs > 1 else logging.DEBUG,
                         'Waiting for jobs grpc channel to be ready at %s.',
                         endpoint)
             return beam_job_api_pb2_grpc.JobServiceStub(channel)
         except:  # pylint: disable=bare-except
             logging.exception("Error bringing up job service")
             self.stop()
             raise

示例#3

0

显示文件

文件： universal_local_runner.py 项目： NarasimhaKattunga/kafka-1

  def _get_job_service(self):
    with self._job_service_lock:
      if not self._job_service:
        if self._runner_api_address:
          self._job_service = beam_job_api_pb2_grpc.JobServiceStub(
              grpc.insecure_channel(self._runner_api_address))
        elif self._use_subprocesses:
          self._job_service = self._start_local_runner_subprocess_job_service()

        elif self._use_grpc:
          self._servicer = JobServicer(use_grpc=True)
          self._job_service = beam_job_api_pb2_grpc.JobServiceStub(
              grpc.insecure_channel(
                  'localhost:%d' % self._servicer.start_grpc()))

        else:
          self._job_service = JobServicer(use_grpc=False)

    return self._job_service

示例#4

0

显示文件

文件： portable_runner.py 项目： zhangminglei/beam

    def run_pipeline(self, pipeline):
        docker_image = (
            pipeline.options.view_as(PortableOptions).harness_docker_image
            or self.default_docker_image())
        job_endpoint = pipeline.options.view_as(PortableOptions).job_endpoint
        if not job_endpoint:
            raise ValueError(
                'job_endpoint should be provided while creating runner.')

        proto_context = pipeline_context.PipelineContext(
            default_environment_url=docker_image)
        proto_pipeline = pipeline.to_runner_api(context=proto_context)

        if not self.is_embedded_fnapi_runner:
            # Java has different expectations about coders
            # (windowed in Fn API, but *un*windowed in runner API), whereas the
            # embedded FnApiRunner treats them consistently, so we must guard this
            # for now, until FnApiRunner is fixed.
            # See also BEAM-2717.
            for pcoll in proto_pipeline.components.pcollections.values():
                if pcoll.coder_id not in proto_context.coders:
                    # This is not really a coder id, but a pickled coder.
                    coder = coders.registry.get_coder(
                        pickler.loads(pcoll.coder_id))
                    pcoll.coder_id = proto_context.coders.get_id(coder)
            proto_context.coders.populate_map(proto_pipeline.components.coders)

        # Some runners won't detect the GroupByKey transform unless it has no
        # subtransforms.  Remove all sub-transforms until BEAM-4605 is resolved.
        for _, transform_proto in list(
                proto_pipeline.components.transforms.items()):
            if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn:
                for sub_transform in transform_proto.subtransforms:
                    del proto_pipeline.components.transforms[sub_transform]
                del transform_proto.subtransforms[:]

        job_service = beam_job_api_pb2_grpc.JobServiceStub(
            grpc.insecure_channel(job_endpoint))
        prepare_response = job_service.Prepare(
            beam_job_api_pb2.PrepareJobRequest(job_name='job',
                                               pipeline=proto_pipeline))
        if prepare_response.artifact_staging_endpoint.url:
            stager = portable_stager.PortableStager(
                grpc.insecure_channel(
                    prepare_response.artifact_staging_endpoint.url),
                prepare_response.staging_session_token)
            retrieval_token, _ = stager.stage_job_resources(
                pipeline._options, staging_location='')
        else:
            retrieval_token = None
        run_response = job_service.Run(
            beam_job_api_pb2.RunJobRequest(
                preparation_id=prepare_response.preparation_id,
                retrieval_token=retrieval_token))
        return PipelineResult(job_service, run_response.job_id)

示例#5

0

显示文件

    def __init__(self, run_command=None):
        # TODO(BEAM-2431): Change this to a docker container from a command.
        self.process = subprocess.Popen([
            'python', '-m',
            'apache_beam.runners.experimental.python_rpc_direct.server'
        ])

        self.channel = grpc.insecure_channel('localhost:50051')
        self.service = beam_job_api_pb2_grpc.JobServiceStub(self.channel)

        # Sleep for 2 seconds for process to start completely
        # This is just for the co-process and would be removed
        # once we migrate to docker.
        time.sleep(2)

示例#6

0

显示文件

    def run_pipeline(self, pipeline):
        docker_image = (
            pipeline.options.view_as(PortableOptions).harness_docker_image
            or self.default_docker_image())
        job_endpoint = pipeline.options.view_as(PortableOptions).job_endpoint
        if not job_endpoint:
            raise ValueError(
                'job_endpoint should be provided while creating runner.')

        proto_context = pipeline_context.PipelineContext(
            default_environment_url=docker_image)
        proto_pipeline = pipeline.to_runner_api(context=proto_context)

        # Some runners won't detect the GroupByKey transform unless it has no
        # subtransforms.  Remove all sub-transforms until BEAM-4605 is resolved.
        for _, transform_proto in list(
                proto_pipeline.components.transforms.items()):
            if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn:
                for sub_transform in transform_proto.subtransforms:
                    del proto_pipeline.components.transforms[sub_transform]
                del transform_proto.subtransforms[:]

        job_service = beam_job_api_pb2_grpc.JobServiceStub(
            grpc.insecure_channel(job_endpoint))
        prepare_response = job_service.Prepare(
            beam_job_api_pb2.PrepareJobRequest(job_name='job',
                                               pipeline=proto_pipeline))
        if prepare_response.artifact_staging_endpoint.url:
            # Must commit something to get a retrieval token,
            # committing empty manifest for now.
            # TODO(BEAM-3883): Actually stage required files.
            artifact_service = beam_artifact_api_pb2_grpc.ArtifactStagingServiceStub(
                grpc.insecure_channel(
                    prepare_response.artifact_staging_endpoint.url))
            commit_manifest = artifact_service.CommitManifest(
                beam_artifact_api_pb2.CommitManifestRequest(
                    manifest=beam_artifact_api_pb2.Manifest(),
                    staging_session_token=prepare_response.
                    staging_session_token))
            retrieval_token = commit_manifest.retrieval_token
        else:
            retrieval_token = None
        run_response = job_service.Run(
            beam_job_api_pb2.RunJobRequest(
                preparation_id=prepare_response.preparation_id,
                retrieval_token=retrieval_token))
        return PipelineResult(job_service, run_response.job_id)

示例#7

0

显示文件

 def _start_local_runner_subprocess_job_service(self):
   if self._subprocess:
     # Kill the old one if it exists.
     self._subprocess.kill()
   # TODO(robertwb): Consider letting the subprocess pick one and
   # communicate it back...
   port = _pick_unused_port()
   logging.info("Starting server on port %d.", port)
   self._subprocess = subprocess.Popen([
       sys.executable,
       '-m',
       'apache_beam.runners.portability.universal_local_runner_main',
       '-p',
       str(port),
       '--worker_command_line',
       '%s -m apache_beam.runners.worker.sdk_worker_main' % sys.executable
   ])
   job_service = beam_job_api_pb2_grpc.JobServiceStub(
       grpc.insecure_channel('localhost:%d' % port))
   logging.info("Waiting for server to be ready...")
   start = time.time()
   timeout = 30
   while True:
     time.sleep(0.1)
     if self._subprocess.poll() is not None:
       raise RuntimeError(
           "Subprocess terminated unexpectedly with exit code %d." %
           self._subprocess.returncode)
     elif time.time() - start > timeout:
       raise RuntimeError(
           "Pipeline timed out waiting for job service subprocess.")
     else:
       try:
         job_service.GetState(
             beam_job_api_pb2.GetJobStateRequest(job_id='[fake]'))
         break
       except grpc.RpcError as exn:
         if exn.code != grpc.StatusCode.UNAVAILABLE:
           # We were able to contact the service for our fake state request.
           break
   logging.info("Server ready.")
   return job_service

示例#8

0

显示文件

文件： portable_runner.py 项目： welshri/beam

  def run_pipeline(self, pipeline):
    portable_options = pipeline.options.view_as(PortableOptions)
    job_endpoint = portable_options.job_endpoint

    # TODO: https://issues.apache.org/jira/browse/BEAM-5525
    # portable runner specific default
    if pipeline.options.view_as(SetupOptions).sdk_location == 'default':
      pipeline.options.view_as(SetupOptions).sdk_location = 'container'

    if not job_endpoint:
      docker = DockerizedJobServer()
      job_endpoint = docker.start()

    proto_context = pipeline_context.PipelineContext(
        default_environment=PortableRunner._create_environment(
            portable_options))
    proto_pipeline = pipeline.to_runner_api(context=proto_context)

    # Some runners won't detect the GroupByKey transform unless it has no
    # subtransforms.  Remove all sub-transforms until BEAM-4605 is resolved.
    for _, transform_proto in list(
        proto_pipeline.components.transforms.items()):
      if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn:
        for sub_transform in transform_proto.subtransforms:
          del proto_pipeline.components.transforms[sub_transform]
        del transform_proto.subtransforms[:]

    # TODO: Define URNs for options.
    # convert int values: https://issues.apache.org/jira/browse/BEAM-5509
    options = {'beam:option:' + k + ':v1': (str(v) if type(v) == int else v)
               for k, v in pipeline._options.get_all_options().items()
               if v is not None}

    channel = grpc.insecure_channel(job_endpoint)
    grpc.channel_ready_future(channel).result()
    job_service = beam_job_api_pb2_grpc.JobServiceStub(channel)

    # Sends the PrepareRequest but retries in case the channel is not ready
    def send_prepare_request(max_retries=5):
      num_retries = 0
      while True:
        try:
          # This reports channel is READY but connections may fail
          # Seems to be only an issue on Mac with port forwardings
          grpc.channel_ready_future(channel).result()
          return job_service.Prepare(
              beam_job_api_pb2.PrepareJobRequest(
                  job_name='job', pipeline=proto_pipeline,
                  pipeline_options=job_utils.dict_to_struct(options)))
        except grpc._channel._Rendezvous as e:
          num_retries += 1
          if num_retries > max_retries:
            raise e

    prepare_response = send_prepare_request()
    if prepare_response.artifact_staging_endpoint.url:
      stager = portable_stager.PortableStager(
          grpc.insecure_channel(prepare_response.artifact_staging_endpoint.url),
          prepare_response.staging_session_token)
      retrieval_token, _ = stager.stage_job_resources(
          pipeline._options,
          staging_location='')
    else:
      retrieval_token = None
    run_response = job_service.Run(
        beam_job_api_pb2.RunJobRequest(
            preparation_id=prepare_response.preparation_id,
            retrieval_token=retrieval_token))
    return PipelineResult(job_service, run_response.job_id)

示例#9

0

显示文件

文件： job_server.py 项目： aryansingh12/apache-beam

 def start(self):
   # type: () -> beam_job_api_pb2_grpc.JobServiceStub
   channel = grpc.insecure_channel(self._endpoint)
   grpc.channel_ready_future(channel).result(timeout=self._timeout)
   return beam_job_api_pb2_grpc.JobServiceStub(channel)

示例#10

0

显示文件

文件： portable_runner.py 项目： sswarnak77/beam

 def _create_job_service(self):
   return beam_job_api_pb2_grpc.JobServiceStub(
       grpc.insecure_channel(self._job_service_address))

示例#11

0

显示文件

    def run_pipeline(self, pipeline, options):
        portable_options = options.view_as(PortableOptions)
        job_endpoint = portable_options.job_endpoint

        # TODO: https://issues.apache.org/jira/browse/BEAM-5525
        # portable runner specific default
        if options.view_as(SetupOptions).sdk_location == 'default':
            options.view_as(SetupOptions).sdk_location = 'container'

        if not job_endpoint:
            # TODO Provide a way to specify a container Docker URL
            # https://issues.apache.org/jira/browse/BEAM-6328
            docker = DockerizedJobServer()
            job_endpoint = docker.start()

        # This is needed as we start a worker server if one is requested
        # but none is provided.
        if portable_options.environment_type == 'LOOPBACK':
            portable_options.environment_config, server = (
                BeamFnExternalWorkerPoolServicer.start(
                    sdk_worker_main._get_worker_count(options)))
            cleanup_callbacks = [functools.partial(server.stop, 1)]
        else:
            cleanup_callbacks = []

        proto_pipeline = pipeline.to_runner_api(
            default_environment=PortableRunner._create_environment(
                portable_options))

        # Some runners won't detect the GroupByKey transform unless it has no
        # subtransforms.  Remove all sub-transforms until BEAM-4605 is resolved.
        for _, transform_proto in list(
                proto_pipeline.components.transforms.items()):
            if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn:
                for sub_transform in transform_proto.subtransforms:
                    del proto_pipeline.components.transforms[sub_transform]
                del transform_proto.subtransforms[:]

        # Preemptively apply combiner lifting, until all runners support it.
        # This optimization is idempotent.
        if not options.view_as(StandardOptions).streaming:
            stages = list(
                fn_api_runner_transforms.leaf_transform_stages(
                    proto_pipeline.root_transform_ids,
                    proto_pipeline.components))
            stages = fn_api_runner_transforms.lift_combiners(
                stages,
                fn_api_runner_transforms.TransformContext(
                    proto_pipeline.components))
            proto_pipeline = fn_api_runner_transforms.with_stages(
                proto_pipeline, stages)

        # TODO: Define URNs for options.
        # convert int values: https://issues.apache.org/jira/browse/BEAM-5509
        p_options = {
            'beam:option:' + k + ':v1': (str(v) if type(v) == int else v)
            for k, v in options.get_all_options().items() if v is not None
        }

        channel = grpc.insecure_channel(job_endpoint)
        grpc.channel_ready_future(channel).result()
        job_service = beam_job_api_pb2_grpc.JobServiceStub(channel)

        # Sends the PrepareRequest but retries in case the channel is not ready
        def send_prepare_request(max_retries=5):
            num_retries = 0
            while True:
                try:
                    # This reports channel is READY but connections may fail
                    # Seems to be only an issue on Mac with port forwardings
                    grpc.channel_ready_future(channel).result()
                    return job_service.Prepare(
                        beam_job_api_pb2.PrepareJobRequest(
                            job_name='job',
                            pipeline=proto_pipeline,
                            pipeline_options=job_utils.dict_to_struct(
                                p_options)))
                except grpc._channel._Rendezvous as e:
                    num_retries += 1
                    if num_retries > max_retries:
                        raise e

        prepare_response = send_prepare_request()
        if prepare_response.artifact_staging_endpoint.url:
            stager = portable_stager.PortableStager(
                grpc.insecure_channel(
                    prepare_response.artifact_staging_endpoint.url),
                prepare_response.staging_session_token)
            retrieval_token, _ = stager.stage_job_resources(
                options, staging_location='')
        else:
            retrieval_token = None

        try:
            state_stream = job_service.GetStateStream(
                beam_job_api_pb2.GetJobStateRequest(
                    job_id=prepare_response.preparation_id))
            # If there's an error, we don't always get it until we try to read.
            # Fortunately, there's always an immediate current state published.
            state_stream = itertools.chain([next(state_stream)], state_stream)
            message_stream = job_service.GetMessageStream(
                beam_job_api_pb2.JobMessagesRequest(
                    job_id=prepare_response.preparation_id))
        except Exception:
            # TODO(BEAM-6442): Unify preparation_id and job_id for all runners.
            state_stream = message_stream = None

        # Run the job and wait for a result.
        run_response = job_service.Run(
            beam_job_api_pb2.RunJobRequest(
                preparation_id=prepare_response.preparation_id,
                retrieval_token=retrieval_token))

        if state_stream is None:
            state_stream = job_service.GetStateStream(
                beam_job_api_pb2.GetJobStateRequest(
                    job_id=run_response.job_id))
            message_stream = job_service.GetMessageStream(
                beam_job_api_pb2.JobMessagesRequest(
                    job_id=run_response.job_id))

        return PipelineResult(job_service, run_response.job_id, message_stream,
                              state_stream, cleanup_callbacks)

示例#12

0

显示文件

文件： portable_runner.py 项目： rohangulati/beam

    def run_pipeline(self, pipeline):
        portable_options = pipeline.options.view_as(PortableOptions)
        job_endpoint = portable_options.job_endpoint
        if not job_endpoint:
            docker = DockerizedJobServer()
            job_endpoint = docker.start()

        proto_context = pipeline_context.PipelineContext(
            default_environment=PortableRunner._create_environment(
                portable_options))
        proto_pipeline = pipeline.to_runner_api(context=proto_context)

        if not self.is_embedded_fnapi_runner:
            # Java has different expectations about coders
            # (windowed in Fn API, but *un*windowed in runner API), whereas the
            # embedded FnApiRunner treats them consistently, so we must guard this
            # for now, until FnApiRunner is fixed.
            # See also BEAM-2717.
            for pcoll in proto_pipeline.components.pcollections.values():
                if pcoll.coder_id not in proto_context.coders:
                    # This is not really a coder id, but a pickled coder.
                    coder = coders.registry.get_coder(
                        pickler.loads(pcoll.coder_id))
                    pcoll.coder_id = proto_context.coders.get_id(coder)
            proto_context.coders.populate_map(proto_pipeline.components.coders)

        # Some runners won't detect the GroupByKey transform unless it has no
        # subtransforms.  Remove all sub-transforms until BEAM-4605 is resolved.
        for _, transform_proto in list(
                proto_pipeline.components.transforms.items()):
            if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn:
                for sub_transform in transform_proto.subtransforms:
                    del proto_pipeline.components.transforms[sub_transform]
                del transform_proto.subtransforms[:]

        # TODO: Define URNs for options.
        # convert int values: https://issues.apache.org/jira/browse/BEAM-5509
        options = {
            'beam:option:' + k + ':v1': (str(v) if type(v) == int else v)
            for k, v in pipeline._options.get_all_options().items()
            if v is not None
        }

        channel = grpc.insecure_channel(job_endpoint)
        grpc.channel_ready_future(channel).result()
        job_service = beam_job_api_pb2_grpc.JobServiceStub(channel)

        # Sends the PrepareRequest but retries in case the channel is not ready
        def send_prepare_request(max_retries=5):
            num_retries = 0
            while True:
                try:
                    # This reports channel is READY but connections may fail
                    # Seems to be only an issue on Mac with port forwardings
                    grpc.channel_ready_future(channel).result()
                    return job_service.Prepare(
                        beam_job_api_pb2.PrepareJobRequest(
                            job_name='job',
                            pipeline=proto_pipeline,
                            pipeline_options=job_utils.dict_to_struct(
                                options)))
                except grpc._channel._Rendezvous as e:
                    num_retries += 1
                    if num_retries > max_retries:
                        raise e

        prepare_response = send_prepare_request()
        if prepare_response.artifact_staging_endpoint.url:
            stager = portable_stager.PortableStager(
                grpc.insecure_channel(
                    prepare_response.artifact_staging_endpoint.url),
                prepare_response.staging_session_token)
            retrieval_token, _ = stager.stage_job_resources(
                pipeline._options, staging_location='')
        else:
            retrieval_token = None
        run_response = job_service.Run(
            beam_job_api_pb2.RunJobRequest(
                preparation_id=prepare_response.preparation_id,
                retrieval_token=retrieval_token))
        return PipelineResult(job_service, run_response.job_id)

示例#13

0

显示文件

 def start(self):
     channel = grpc.insecure_channel(self._endpoint)
     grpc.channel_ready_future(channel).result()
     return beam_job_api_pb2_grpc.JobServiceStub(channel)

示例#14

0

显示文件

文件： portable_runner.py 项目： wscheep/beam

  def run_pipeline(self, pipeline, options):
    portable_options = options.view_as(PortableOptions)
    job_endpoint = portable_options.job_endpoint

    # TODO: https://issues.apache.org/jira/browse/BEAM-5525
    # portable runner specific default
    if options.view_as(SetupOptions).sdk_location == 'default':
      options.view_as(SetupOptions).sdk_location = 'container'

    if not job_endpoint:
      # TODO Provide a way to specify a container Docker URL
      # https://issues.apache.org/jira/browse/BEAM-6328
      docker = DockerizedJobServer()
      job_endpoint = docker.start()
      job_service = None
    elif job_endpoint == 'embed':
      job_service = local_job_service.LocalJobServicer()
    else:
      job_service = None

    # This is needed as we start a worker server if one is requested
    # but none is provided.
    if portable_options.environment_type == 'LOOPBACK':
      portable_options.environment_config, server = (
          BeamFnExternalWorkerPoolServicer.start(
              sdk_worker_main._get_worker_count(options)))
      globals()['x'] = server
      cleanup_callbacks = [functools.partial(server.stop, 1)]
    else:
      cleanup_callbacks = []

    proto_pipeline = pipeline.to_runner_api(
        default_environment=PortableRunner._create_environment(
            portable_options))

    # Some runners won't detect the GroupByKey transform unless it has no
    # subtransforms.  Remove all sub-transforms until BEAM-4605 is resolved.
    for _, transform_proto in list(
        proto_pipeline.components.transforms.items()):
      if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn:
        for sub_transform in transform_proto.subtransforms:
          del proto_pipeline.components.transforms[sub_transform]
        del transform_proto.subtransforms[:]

    # Preemptively apply combiner lifting, until all runners support it.
    # This optimization is idempotent.
    pre_optimize = options.view_as(DebugOptions).lookup_experiment(
        'pre_optimize', 'combine').lower()
    if not options.view_as(StandardOptions).streaming:
      flink_known_urns = frozenset([
          common_urns.composites.RESHUFFLE.urn,
          common_urns.primitives.IMPULSE.urn,
          common_urns.primitives.FLATTEN.urn,
          common_urns.primitives.GROUP_BY_KEY.urn])
      if pre_optimize == 'combine':
        proto_pipeline = fn_api_runner_transforms.optimize_pipeline(
            proto_pipeline,
            phases=[fn_api_runner_transforms.lift_combiners],
            known_runner_urns=flink_known_urns,
            partial=True)
      elif pre_optimize == 'all':
        proto_pipeline = fn_api_runner_transforms.optimize_pipeline(
            proto_pipeline,
            phases=[fn_api_runner_transforms.annotate_downstream_side_inputs,
                    fn_api_runner_transforms.annotate_stateful_dofns_as_roots,
                    fn_api_runner_transforms.fix_side_input_pcoll_coders,
                    fn_api_runner_transforms.lift_combiners,
                    fn_api_runner_transforms.fix_flatten_coders,
                    # fn_api_runner_transforms.sink_flattens,
                    fn_api_runner_transforms.greedily_fuse,
                    fn_api_runner_transforms.read_to_impulse,
                    fn_api_runner_transforms.extract_impulse_stages,
                    fn_api_runner_transforms.remove_data_plane_ops,
                    fn_api_runner_transforms.sort_stages],
            known_runner_urns=flink_known_urns)
      elif pre_optimize == 'none':
        pass
      else:
        raise ValueError('Unknown value for pre_optimize: %s' % pre_optimize)

    if not job_service:
      channel = grpc.insecure_channel(job_endpoint)
      grpc.channel_ready_future(channel).result()
      job_service = beam_job_api_pb2_grpc.JobServiceStub(channel)
    else:
      channel = None

    # fetch runner options from job service
    # retries in case the channel is not ready
    def send_options_request(max_retries=5):
      num_retries = 0
      while True:
        try:
          # This reports channel is READY but connections may fail
          # Seems to be only an issue on Mac with port forwardings
          if channel:
            grpc.channel_ready_future(channel).result()
          return job_service.DescribePipelineOptions(
              beam_job_api_pb2.DescribePipelineOptionsRequest())
        except grpc._channel._Rendezvous as e:
          num_retries += 1
          if num_retries > max_retries:
            raise e

    options_response = send_options_request()

    def add_runner_options(parser):
      for option in options_response.options:
        try:
          # no default values - we don't want runner options
          # added unless they were specified by the user
          add_arg_args = {'action' : 'store', 'help' : option.description}
          if option.type == beam_job_api_pb2.PipelineOptionType.BOOLEAN:
            add_arg_args['action'] = 'store_true'\
              if option.default_value != 'true' else 'store_false'
          elif option.type == beam_job_api_pb2.PipelineOptionType.INTEGER:
            add_arg_args['type'] = int
          elif option.type == beam_job_api_pb2.PipelineOptionType.ARRAY:
            add_arg_args['action'] = 'append'
          parser.add_argument("--%s" % option.name, **add_arg_args)
        except Exception as e:
          # ignore runner options that are already present
          # only in this case is duplicate not treated as error
          if 'conflicting option string' not in str(e):
            raise
          logging.debug("Runner option '%s' was already added" % option.name)

    all_options = options.get_all_options(add_extra_args_fn=add_runner_options)
    # TODO: Define URNs for options.
    # convert int values: https://issues.apache.org/jira/browse/BEAM-5509
    p_options = {'beam:option:' + k + ':v1': (str(v) if type(v) == int else v)
                 for k, v in all_options.items()
                 if v is not None}

    prepare_response = job_service.Prepare(
        beam_job_api_pb2.PrepareJobRequest(
            job_name='job', pipeline=proto_pipeline,
            pipeline_options=job_utils.dict_to_struct(p_options)))
    if prepare_response.artifact_staging_endpoint.url:
      stager = portable_stager.PortableStager(
          grpc.insecure_channel(prepare_response.artifact_staging_endpoint.url),
          prepare_response.staging_session_token)
      retrieval_token, _ = stager.stage_job_resources(
          options,
          staging_location='')
    else:
      retrieval_token = None

    try:
      state_stream = job_service.GetStateStream(
          beam_job_api_pb2.GetJobStateRequest(
              job_id=prepare_response.preparation_id))
      # If there's an error, we don't always get it until we try to read.
      # Fortunately, there's always an immediate current state published.
      state_stream = itertools.chain(
          [next(state_stream)],
          state_stream)
      message_stream = job_service.GetMessageStream(
          beam_job_api_pb2.JobMessagesRequest(
              job_id=prepare_response.preparation_id))
    except Exception:
      # TODO(BEAM-6442): Unify preparation_id and job_id for all runners.
      state_stream = message_stream = None

    # Run the job and wait for a result.
    run_response = job_service.Run(
        beam_job_api_pb2.RunJobRequest(
            preparation_id=prepare_response.preparation_id,
            retrieval_token=retrieval_token))

    if state_stream is None:
      state_stream = job_service.GetStateStream(
          beam_job_api_pb2.GetJobStateRequest(
              job_id=run_response.job_id))
      message_stream = job_service.GetMessageStream(
          beam_job_api_pb2.JobMessagesRequest(
              job_id=run_response.job_id))

    return PipelineResult(job_service, run_response.job_id, message_stream,
                          state_stream, cleanup_callbacks)