Exemplo n.º 1
0
    def StartWorker(
            self,
            start_worker_request,  # type: beam_fn_api_pb2.StartWorkerRequest
            unused_context):
        # type: (...) -> beam_fn_api_pb2.StartWorkerResponse
        try:
            if self._use_process:
                command = [
                    'python', '-c',
                    'from apache_beam.runners.worker.sdk_worker '
                    'import SdkHarness; '
                    'SdkHarness('
                    '"%s",'
                    'worker_id="%s",'
                    'state_cache_size=%d'
                    'data_buffer_time_limit_ms=%d'
                    ')'
                    '.run()' %
                    (start_worker_request.control_endpoint.url,
                     start_worker_request.worker_id, self._state_cache_size,
                     self._data_buffer_time_limit_ms)
                ]
                if self._container_executable:
                    # command as per container spec
                    # the executable is responsible to handle concurrency
                    # for artifact retrieval and other side effects
                    command = [
                        self._container_executable,
                        '--id=%s' % start_worker_request.worker_id,
                        '--logging_endpoint=%s' %
                        start_worker_request.logging_endpoint.url,
                        '--artifact_endpoint=%s' %
                        start_worker_request.artifact_endpoint.url,
                        '--provision_endpoint=%s' %
                        start_worker_request.provision_endpoint.url,
                        '--control_endpoint=%s' %
                        start_worker_request.control_endpoint.url,
                    ]

                _LOGGER.warning("Starting worker with command %s" % command)
                worker_process = subprocess.Popen(command,
                                                  stdout=subprocess.PIPE,
                                                  close_fds=True)
                self._worker_processes[
                    start_worker_request.worker_id] = worker_process
            else:
                worker = sdk_worker.SdkHarness(
                    start_worker_request.control_endpoint.url,
                    worker_id=start_worker_request.worker_id,
                    state_cache_size=self._state_cache_size,
                    data_buffer_time_limit_ms=self._data_buffer_time_limit_ms)
                worker_thread = threading.Thread(
                    name='run_worker_%s' % start_worker_request.worker_id,
                    target=worker.run)
                worker_thread.daemon = True
                worker_thread.start()

            return beam_fn_api_pb2.StartWorkerResponse()
        except Exception as exn:
            return beam_fn_api_pb2.StartWorkerResponse(error=str(exn))
Exemplo n.º 2
0
        def __init__(self):
            self.state_handler = FnApiRunner.SimpleState()
            self.control_server = grpc.server(
                futures.ThreadPoolExecutor(max_workers=10))
            self.control_port = self.control_server.add_insecure_port('[::]:0')

            self.data_server = grpc.server(
                futures.ThreadPoolExecutor(max_workers=10))
            self.data_port = self.data_server.add_insecure_port('[::]:0')

            self.control_handler = streaming_rpc_handler(
                beam_fn_api_pb2.BeamFnControlServicer, 'Control')
            beam_fn_api_pb2.add_BeamFnControlServicer_to_server(
                self.control_handler, self.control_server)

            self.data_plane_handler = data_plane.GrpcServerDataChannel()
            beam_fn_api_pb2.add_BeamFnDataServicer_to_server(
                self.data_plane_handler, self.data_server)

            logging.info('starting control server on port %s',
                         self.control_port)
            logging.info('starting data server on port %s', self.data_port)
            self.data_server.start()
            self.control_server.start()

            self.worker = sdk_worker.SdkHarness(
                grpc.insecure_channel('localhost:%s' % self.control_port))
            self.worker_thread = threading.Thread(target=self.worker.run)
            logging.info('starting worker')
            self.worker_thread.start()
Exemplo n.º 3
0
  def _check_fn_registration_multi_request(self, *args):
    """Check the function registration calls to the sdk_harness.

    Args:
     tuple of request_count, number of process_bundles per request and workers
     counts to process the request.
    """
    for (request_count, process_bundles_per_request) in args:
      requests = []
      process_bundle_descriptors = []

      for i in range(request_count):
        pbd = self._get_process_bundles(i, process_bundles_per_request)
        process_bundle_descriptors.extend(pbd)
        requests.append(
            beam_fn_api_pb2.InstructionRequest(
                instruction_id=str(i),
                register=beam_fn_api_pb2.RegisterRequest(
                    process_bundle_descriptor=process_bundle_descriptors)))

      test_controller = BeamFnControlServicer(requests)

      server = grpc.server(UnboundedThreadPoolExecutor())
      beam_fn_api_pb2_grpc.add_BeamFnControlServicer_to_server(
          test_controller, server)
      test_port = server.add_insecure_port("[::]:0")
      server.start()

      harness = sdk_worker.SdkHarness(
          "localhost:%s" % test_port, state_cache_size=100)
      harness.run()

      self.assertEqual(harness._bundle_processor_cache.fns,
                       {item.id: item
                        for item in process_bundle_descriptors})
Exemplo n.º 4
0
    def test_fn_registration(self):
        process_bundle_descriptors = [
            beam_fn_api_pb2.ProcessBundleDescriptor(
                id=str(100 + ix),
                transforms={
                    str(ix):
                    beam_runner_api_pb2.PTransform(unique_name=str(ix))
                }) for ix in range(4)
        ]

        test_controller = BeamFnControlServicer([
            beam_fn_api_pb2.InstructionRequest(
                register=beam_fn_api_pb2.RegisterRequest(
                    process_bundle_descriptor=process_bundle_descriptors))
        ])

        server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
        beam_fn_api_pb2.add_BeamFnControlServicer_to_server(
            test_controller, server)
        test_port = server.add_insecure_port("[::]:0")
        server.start()

        channel = grpc.insecure_channel("localhost:%s" % test_port)
        harness = sdk_worker.SdkHarness(channel)
        harness.run()
        self.assertEqual(
            harness.worker.fns,
            {item.id: item
             for item in process_bundle_descriptors})
Exemplo n.º 5
0
    def test_fn_registration(self):
        fns = [beam_fn_api_pb2.FunctionSpec(id=str(ix)) for ix in range(4)]

        process_bundle_descriptors = [
            beam_fn_api_pb2.ProcessBundleDescriptor(
                id=str(100 + ix),
                primitive_transform=[
                    beam_fn_api_pb2.PrimitiveTransform(function_spec=fn)
                ]) for ix, fn in enumerate(fns)
        ]

        test_controller = BeamFnControlServicer([
            beam_fn_api_pb2.InstructionRequest(
                register=beam_fn_api_pb2.RegisterRequest(
                    process_bundle_descriptor=process_bundle_descriptors))
        ])

        server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
        beam_fn_api_pb2.add_BeamFnControlServicer_to_server(
            test_controller, server)
        test_port = server.add_insecure_port("[::]:0")
        server.start()

        channel = grpc.insecure_channel("localhost:%s" % test_port)
        harness = sdk_worker.SdkHarness(channel)
        harness.run()
        self.assertEqual(
            harness.worker.fns,
            {item.id: item
             for item in fns + process_bundle_descriptors})
Exemplo n.º 6
0
  def NotifyRunnerAvailable(self, start_worker_request, context):
    try:
      if self._use_process:
        command = ['python', '-c',
                   'from apache_beam.runners.worker.sdk_worker '
                   'import SdkHarness; '
                   'SdkHarness("%s",worker_count=%d,worker_id="%s").run()' % (
                       start_worker_request.control_endpoint.url,
                       self._worker_threads,
                       start_worker_request.worker_id)]
        logging.warn("Starting worker with command %s" % (command))
        worker_process = subprocess.Popen(command, stdout=subprocess.PIPE)

        # Register to kill the subprocess on exit.
        atexit.register(worker_process.kill)
      else:
        worker = sdk_worker.SdkHarness(
            start_worker_request.control_endpoint.url,
            worker_count=self._worker_threads,
            worker_id=start_worker_request.worker_id)
        worker_thread = threading.Thread(
            name='run_worker_%s' % start_worker_request.worker_id,
            target=worker.run)
        worker_thread.daemon = True
        worker_thread.start()

      return beam_fn_api_pb2.NotifyRunnerAvailableResponse()
    except Exception as exn:
      return beam_fn_api_pb2.NotifyRunnerAvailableResponse(
          error=str(exn))
Exemplo n.º 7
0
 def start_worker(self):
     # type: () -> None
     self.worker = sdk_worker.SdkHarness(
         self.control_address,
         state_cache_size=self._state_cache_size,
         data_buffer_time_limit_ms=self._data_buffer_time_limit_ms,
         worker_id=self.worker_id)
     self.worker_thread = threading.Thread(name='run_worker',
                                           target=self.worker.run)
     self.worker_thread.daemon = True
     self.worker_thread.start()
Exemplo n.º 8
0
    def get_responses(self, instruction_requests):
        """Evaluates and returns {id: InstructionResponse} for the requests."""
        test_controller = BeamFnControlServicer(instruction_requests)

        server = grpc.server(thread_pool_executor.shared_unbounded_instance())
        beam_fn_api_pb2_grpc.add_BeamFnControlServicer_to_server(
            test_controller, server)
        test_port = server.add_insecure_port("[::]:0")
        server.start()

        harness = sdk_worker.SdkHarness("localhost:%s" % test_port,
                                        state_cache_size=100)
        harness.run()
        return test_controller.responses
Exemplo n.º 9
0
 def NotifyRunnerAvailable(self, start_worker_request, context):
     try:
         worker = sdk_worker.SdkHarness(
             start_worker_request.control_endpoint.url,
             worker_count=self._worker_threads,
             worker_id=start_worker_request.worker_id)
         worker_thread = threading.Thread(name='run_worker_%s' %
                                          start_worker_request.worker_id,
                                          target=worker.run)
         worker_thread.daemon = True
         worker_thread.start()
         return beam_fn_api_pb2.NotifyRunnerAvailableResponse()
     except Exception as exn:
         return beam_fn_api_pb2.NotifyRunnerAvailableResponse(
             error=str(exn))
Exemplo n.º 10
0
        def __init__(self, sdk_harness_factory=None):
            self.sdk_harness_factory = sdk_harness_factory
            self.control_server = grpc.server(
                futures.ThreadPoolExecutor(max_workers=10))
            self.control_port = self.control_server.add_insecure_port('[::]:0')

            self.data_server = grpc.server(
                futures.ThreadPoolExecutor(max_workers=10))
            self.data_port = self.data_server.add_insecure_port('[::]:0')

            self.control_handler = streaming_rpc_handler(
                beam_fn_api_pb2_grpc.BeamFnControlServicer, 'Control')
            beam_fn_api_pb2_grpc.add_BeamFnControlServicer_to_server(
                self.control_handler, self.control_server)

            self.data_plane_handler = data_plane.GrpcServerDataChannel()
            beam_fn_api_pb2_grpc.add_BeamFnDataServicer_to_server(
                self.data_plane_handler, self.data_server)

            # TODO(robertwb): Is sharing the control channel fine?  Alternatively,
            # how should this be plumbed?
            self.state_handler = FnApiRunner.GrpcStateServicer()
            beam_fn_api_pb2_grpc.add_BeamFnStateServicer_to_server(
                self.state_handler, self.control_server)

            logging.info('starting control server on port %s',
                         self.control_port)
            logging.info('starting data server on port %s', self.data_port)
            self.data_server.start()
            self.control_server.start()

            self.worker = self.sdk_harness_factory(
                'localhost:%s' % self.control_port
            ) if self.sdk_harness_factory else sdk_worker.SdkHarness(
                'localhost:%s' % self.control_port, worker_count=1)

            self.worker_thread = threading.Thread(name='run_worker',
                                                  target=self.worker.run)
            logging.info('starting worker')
            self.worker_thread.start()
Exemplo n.º 11
0
 def start_worker(self):
   self.worker = sdk_worker.SdkHarness(
       self.control_address, worker_count=self._num_threads)
   self.worker_thread = threading.Thread(
       name='run_worker', target=self.worker.run)
   self.worker_thread.start()