def StartWorker( self, start_worker_request, # type: beam_fn_api_pb2.StartWorkerRequest unused_context): # type: (...) -> beam_fn_api_pb2.StartWorkerResponse try: if self._use_process: command = [ 'python', '-c', 'from apache_beam.runners.worker.sdk_worker ' 'import SdkHarness; ' 'SdkHarness(' '"%s",' 'worker_id="%s",' 'state_cache_size=%d' 'data_buffer_time_limit_ms=%d' ')' '.run()' % (start_worker_request.control_endpoint.url, start_worker_request.worker_id, self._state_cache_size, self._data_buffer_time_limit_ms) ] if self._container_executable: # command as per container spec # the executable is responsible to handle concurrency # for artifact retrieval and other side effects command = [ self._container_executable, '--id=%s' % start_worker_request.worker_id, '--logging_endpoint=%s' % start_worker_request.logging_endpoint.url, '--artifact_endpoint=%s' % start_worker_request.artifact_endpoint.url, '--provision_endpoint=%s' % start_worker_request.provision_endpoint.url, '--control_endpoint=%s' % start_worker_request.control_endpoint.url, ] _LOGGER.warning("Starting worker with command %s" % command) worker_process = subprocess.Popen(command, stdout=subprocess.PIPE, close_fds=True) self._worker_processes[ start_worker_request.worker_id] = worker_process else: worker = sdk_worker.SdkHarness( start_worker_request.control_endpoint.url, worker_id=start_worker_request.worker_id, state_cache_size=self._state_cache_size, data_buffer_time_limit_ms=self._data_buffer_time_limit_ms) worker_thread = threading.Thread( name='run_worker_%s' % start_worker_request.worker_id, target=worker.run) worker_thread.daemon = True worker_thread.start() return beam_fn_api_pb2.StartWorkerResponse() except Exception as exn: return beam_fn_api_pb2.StartWorkerResponse(error=str(exn))
def __init__(self): self.state_handler = FnApiRunner.SimpleState() self.control_server = grpc.server( futures.ThreadPoolExecutor(max_workers=10)) self.control_port = self.control_server.add_insecure_port('[::]:0') self.data_server = grpc.server( futures.ThreadPoolExecutor(max_workers=10)) self.data_port = self.data_server.add_insecure_port('[::]:0') self.control_handler = streaming_rpc_handler( beam_fn_api_pb2.BeamFnControlServicer, 'Control') beam_fn_api_pb2.add_BeamFnControlServicer_to_server( self.control_handler, self.control_server) self.data_plane_handler = data_plane.GrpcServerDataChannel() beam_fn_api_pb2.add_BeamFnDataServicer_to_server( self.data_plane_handler, self.data_server) logging.info('starting control server on port %s', self.control_port) logging.info('starting data server on port %s', self.data_port) self.data_server.start() self.control_server.start() self.worker = sdk_worker.SdkHarness( grpc.insecure_channel('localhost:%s' % self.control_port)) self.worker_thread = threading.Thread(target=self.worker.run) logging.info('starting worker') self.worker_thread.start()
def _check_fn_registration_multi_request(self, *args): """Check the function registration calls to the sdk_harness. Args: tuple of request_count, number of process_bundles per request and workers counts to process the request. """ for (request_count, process_bundles_per_request) in args: requests = [] process_bundle_descriptors = [] for i in range(request_count): pbd = self._get_process_bundles(i, process_bundles_per_request) process_bundle_descriptors.extend(pbd) requests.append( beam_fn_api_pb2.InstructionRequest( instruction_id=str(i), register=beam_fn_api_pb2.RegisterRequest( process_bundle_descriptor=process_bundle_descriptors))) test_controller = BeamFnControlServicer(requests) server = grpc.server(UnboundedThreadPoolExecutor()) beam_fn_api_pb2_grpc.add_BeamFnControlServicer_to_server( test_controller, server) test_port = server.add_insecure_port("[::]:0") server.start() harness = sdk_worker.SdkHarness( "localhost:%s" % test_port, state_cache_size=100) harness.run() self.assertEqual(harness._bundle_processor_cache.fns, {item.id: item for item in process_bundle_descriptors})
def test_fn_registration(self): process_bundle_descriptors = [ beam_fn_api_pb2.ProcessBundleDescriptor( id=str(100 + ix), transforms={ str(ix): beam_runner_api_pb2.PTransform(unique_name=str(ix)) }) for ix in range(4) ] test_controller = BeamFnControlServicer([ beam_fn_api_pb2.InstructionRequest( register=beam_fn_api_pb2.RegisterRequest( process_bundle_descriptor=process_bundle_descriptors)) ]) server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) beam_fn_api_pb2.add_BeamFnControlServicer_to_server( test_controller, server) test_port = server.add_insecure_port("[::]:0") server.start() channel = grpc.insecure_channel("localhost:%s" % test_port) harness = sdk_worker.SdkHarness(channel) harness.run() self.assertEqual( harness.worker.fns, {item.id: item for item in process_bundle_descriptors})
def test_fn_registration(self): fns = [beam_fn_api_pb2.FunctionSpec(id=str(ix)) for ix in range(4)] process_bundle_descriptors = [ beam_fn_api_pb2.ProcessBundleDescriptor( id=str(100 + ix), primitive_transform=[ beam_fn_api_pb2.PrimitiveTransform(function_spec=fn) ]) for ix, fn in enumerate(fns) ] test_controller = BeamFnControlServicer([ beam_fn_api_pb2.InstructionRequest( register=beam_fn_api_pb2.RegisterRequest( process_bundle_descriptor=process_bundle_descriptors)) ]) server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) beam_fn_api_pb2.add_BeamFnControlServicer_to_server( test_controller, server) test_port = server.add_insecure_port("[::]:0") server.start() channel = grpc.insecure_channel("localhost:%s" % test_port) harness = sdk_worker.SdkHarness(channel) harness.run() self.assertEqual( harness.worker.fns, {item.id: item for item in fns + process_bundle_descriptors})
def NotifyRunnerAvailable(self, start_worker_request, context): try: if self._use_process: command = ['python', '-c', 'from apache_beam.runners.worker.sdk_worker ' 'import SdkHarness; ' 'SdkHarness("%s",worker_count=%d,worker_id="%s").run()' % ( start_worker_request.control_endpoint.url, self._worker_threads, start_worker_request.worker_id)] logging.warn("Starting worker with command %s" % (command)) worker_process = subprocess.Popen(command, stdout=subprocess.PIPE) # Register to kill the subprocess on exit. atexit.register(worker_process.kill) else: worker = sdk_worker.SdkHarness( start_worker_request.control_endpoint.url, worker_count=self._worker_threads, worker_id=start_worker_request.worker_id) worker_thread = threading.Thread( name='run_worker_%s' % start_worker_request.worker_id, target=worker.run) worker_thread.daemon = True worker_thread.start() return beam_fn_api_pb2.NotifyRunnerAvailableResponse() except Exception as exn: return beam_fn_api_pb2.NotifyRunnerAvailableResponse( error=str(exn))
def start_worker(self): # type: () -> None self.worker = sdk_worker.SdkHarness( self.control_address, state_cache_size=self._state_cache_size, data_buffer_time_limit_ms=self._data_buffer_time_limit_ms, worker_id=self.worker_id) self.worker_thread = threading.Thread(name='run_worker', target=self.worker.run) self.worker_thread.daemon = True self.worker_thread.start()
def get_responses(self, instruction_requests): """Evaluates and returns {id: InstructionResponse} for the requests.""" test_controller = BeamFnControlServicer(instruction_requests) server = grpc.server(thread_pool_executor.shared_unbounded_instance()) beam_fn_api_pb2_grpc.add_BeamFnControlServicer_to_server( test_controller, server) test_port = server.add_insecure_port("[::]:0") server.start() harness = sdk_worker.SdkHarness("localhost:%s" % test_port, state_cache_size=100) harness.run() return test_controller.responses
def NotifyRunnerAvailable(self, start_worker_request, context): try: worker = sdk_worker.SdkHarness( start_worker_request.control_endpoint.url, worker_count=self._worker_threads, worker_id=start_worker_request.worker_id) worker_thread = threading.Thread(name='run_worker_%s' % start_worker_request.worker_id, target=worker.run) worker_thread.daemon = True worker_thread.start() return beam_fn_api_pb2.NotifyRunnerAvailableResponse() except Exception as exn: return beam_fn_api_pb2.NotifyRunnerAvailableResponse( error=str(exn))
def __init__(self, sdk_harness_factory=None): self.sdk_harness_factory = sdk_harness_factory self.control_server = grpc.server( futures.ThreadPoolExecutor(max_workers=10)) self.control_port = self.control_server.add_insecure_port('[::]:0') self.data_server = grpc.server( futures.ThreadPoolExecutor(max_workers=10)) self.data_port = self.data_server.add_insecure_port('[::]:0') self.control_handler = streaming_rpc_handler( beam_fn_api_pb2_grpc.BeamFnControlServicer, 'Control') beam_fn_api_pb2_grpc.add_BeamFnControlServicer_to_server( self.control_handler, self.control_server) self.data_plane_handler = data_plane.GrpcServerDataChannel() beam_fn_api_pb2_grpc.add_BeamFnDataServicer_to_server( self.data_plane_handler, self.data_server) # TODO(robertwb): Is sharing the control channel fine? Alternatively, # how should this be plumbed? self.state_handler = FnApiRunner.GrpcStateServicer() beam_fn_api_pb2_grpc.add_BeamFnStateServicer_to_server( self.state_handler, self.control_server) logging.info('starting control server on port %s', self.control_port) logging.info('starting data server on port %s', self.data_port) self.data_server.start() self.control_server.start() self.worker = self.sdk_harness_factory( 'localhost:%s' % self.control_port ) if self.sdk_harness_factory else sdk_worker.SdkHarness( 'localhost:%s' % self.control_port, worker_count=1) self.worker_thread = threading.Thread(name='run_worker', target=self.worker.run) logging.info('starting worker') self.worker_thread.start()
def start_worker(self): self.worker = sdk_worker.SdkHarness( self.control_address, worker_count=self._num_threads) self.worker_thread = threading.Thread( name='run_worker', target=self.worker.run) self.worker_thread.start()