def __init__( self, control_address, worker_count, credentials=None, worker_id=None, # Caching is disabled by default state_cache_size=0, profiler_factory=None): self._alive = True self._worker_count = worker_count self._worker_index = 0 self._worker_id = worker_id self._state_cache = StateCache(state_cache_size) if credentials is None: logging.info('Creating insecure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.insecure_channel( control_address) else: logging.info('Creating secure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.secure_channel( control_address, credentials) grpc.channel_ready_future(self._control_channel).result(timeout=60) logging.info('Control channel established.') self._control_channel = grpc.intercept_channel( self._control_channel, WorkerIdInterceptor(self._worker_id)) self._data_channel_factory = data_plane.GrpcClientDataChannelFactory( credentials, self._worker_id) self._state_handler_factory = GrpcStateHandlerFactory( self._state_cache, credentials) self._profiler_factory = profiler_factory self._fns = {} # BundleProcessor cache across all workers. self._bundle_processor_cache = BundleProcessorCache( state_handler_factory=self._state_handler_factory, data_channel_factory=self._data_channel_factory, fns=self._fns) # workers for process/finalize bundle. self.workers = queue.Queue() # one worker for progress/split request. self.progress_worker = SdkWorker( self._bundle_processor_cache, profiler_factory=self._profiler_factory) # one thread is enough for getting the progress report. # Assumption: # Progress report generation should not do IO or wait on other resources. # Without wait, having multiple threads will not improve performance and # will only add complexity. self._progress_thread_pool = futures.ThreadPoolExecutor(max_workers=1) # finalize and process share one thread pool. self._process_thread_pool = futures.ThreadPoolExecutor( max_workers=self._worker_count) self._responses = queue.Queue() self._process_bundle_queue = queue.Queue() self._unscheduled_process_bundle = {} logging.info('Initializing SDKHarness with %s workers.', self._worker_count)
def create_state_handler(self, api_service_descriptor): if not api_service_descriptor: return self._throwing_state_handler url = api_service_descriptor.url if url not in self._state_handler_cache: with self._lock: if url not in self._state_handler_cache: # Options to have no limits (-1) on the size of the messages # received or sent over the data plane. The actual buffer size is # controlled in a layer above. options = [('grpc.max_receive_message_length', -1), ('grpc.max_send_message_length', -1)] if self._credentials is None: logging.info('Creating insecure state channel for %s.', url) grpc_channel = GRPCChannelFactory.insecure_channel( url, options=options) else: logging.info('Creating secure state channel for %s.', url) grpc_channel = GRPCChannelFactory.secure_channel( url, self._credentials, options=options) logging.info('State channel established.') # Add workerId to the grpc channel grpc_channel = grpc.intercept_channel(grpc_channel, WorkerIdInterceptor()) self._state_handler_cache[url] = GrpcStateHandler( beam_fn_api_pb2_grpc.BeamFnStateStub(grpc_channel)) return self._state_handler_cache[url]
def create_data_channel(self, remote_grpc_port): url = remote_grpc_port.api_service_descriptor.url if url not in self._data_channel_cache: with self._lock: if url not in self._data_channel_cache: logging.info('Creating client data channel for %s', url) # Options to have no limits (-1) on the size of the messages # received or sent over the data plane. The actual buffer size # is controlled in a layer above. channel_options = [("grpc.max_receive_message_length", -1), ("grpc.max_send_message_length", -1)] grpc_channel = None if self._credentials is None: grpc_channel = GRPCChannelFactory.insecure_channel( url, options=channel_options) else: grpc_channel = GRPCChannelFactory.secure_channel( url, self._credentials, options=channel_options) # Add workerId to the grpc channel grpc_channel = grpc.intercept_channel( grpc_channel, WorkerIdInterceptor(self._worker_id)) self._data_channel_cache[url] = GrpcClientDataChannel( beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel)) return self._data_channel_cache[url]
def create_state_handler(self, api_service_descriptor): if not api_service_descriptor: return self._throwing_state_handler url = api_service_descriptor.url if url not in self._state_handler_cache: with self._lock: if url not in self._state_handler_cache: # Options to have no limits (-1) on the size of the messages # received or sent over the data plane. The actual buffer size is # controlled in a layer above. options = [('grpc.max_receive_message_length', -1), ('grpc.max_send_message_length', -1)] if self._credentials is None: _LOGGER.info('Creating insecure state channel for %s.', url) grpc_channel = GRPCChannelFactory.insecure_channel( url, options=options) else: _LOGGER.info('Creating secure state channel for %s.', url) grpc_channel = GRPCChannelFactory.secure_channel( url, self._credentials, options=options) _LOGGER.info('State channel established.') # Add workerId to the grpc channel grpc_channel = grpc.intercept_channel( grpc_channel, WorkerIdInterceptor()) self._state_handler_cache[url] = CachingStateHandler( self._state_cache, GrpcStateHandler( beam_fn_api_pb2_grpc.BeamFnStateStub( grpc_channel))) return self._state_handler_cache[url]
def create_data_channel_from_url(self, url): # type: (str) -> Optional[GrpcClientDataChannel] if not url: return None if url not in self._data_channel_cache: with self._lock: if url not in self._data_channel_cache: _LOGGER.info('Creating client data channel for %s', url) # Options to have no limits (-1) on the size of the messages # received or sent over the data plane. The actual buffer size # is controlled in a layer above. channel_options = [("grpc.max_receive_message_length", -1), ("grpc.max_send_message_length", -1)] grpc_channel = None if self._credentials is None: grpc_channel = GRPCChannelFactory.insecure_channel( url, options=channel_options) else: grpc_channel = GRPCChannelFactory.secure_channel( url, self._credentials, options=channel_options) # Add workerId to the grpc channel grpc_channel = grpc.intercept_channel( grpc_channel, WorkerIdInterceptor(self._worker_id)) self._data_channel_cache[url] = GrpcClientDataChannel( beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel), self._data_buffer_time_limit_ms) return self._data_channel_cache[url]
def create_data_channel(self, remote_grpc_port): url = remote_grpc_port.api_service_descriptor.url if url not in self._data_channel_cache: with self._lock: if url not in self._data_channel_cache: logging.info('Creating channel for %s', url) # Options to have no limits (-1) on the size of the messages # received or sent over the data plane. The actual buffer size # is controlled in a layer above. channel_options = [("grpc.max_receive_message_length", -1), ("grpc.max_send_message_length", -1)] grpc_channel = None if self._credentials is None: grpc_channel = GRPCChannelFactory.insecure_channel( url, options=channel_options) else: grpc_channel = GRPCChannelFactory.secure_channel( url, self._credentials, options=channel_options) # Add workerId to the grpc channel grpc_channel = grpc.intercept_channel(grpc_channel, WorkerIdInterceptor()) self._data_channel_cache[url] = GrpcClientDataChannel( beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel)) return self._data_channel_cache[url]
def __init__(self, control_address, # type: str credentials=None, worker_id=None, # type: Optional[str] # Caching is disabled by default state_cache_size=0, # time-based data buffering is disabled by default data_buffer_time_limit_ms=0, profiler_factory=None, # type: Optional[Callable[..., Profile]] status_address=None, # type: Optional[str, unicode] ): self._alive = True self._worker_index = 0 self._worker_id = worker_id self._state_cache = StateCache(state_cache_size) if credentials is None: _LOGGER.info('Creating insecure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.insecure_channel( control_address) else: _LOGGER.info('Creating secure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.secure_channel( control_address, credentials) grpc.channel_ready_future(self._control_channel).result(timeout=60) _LOGGER.info('Control channel established.') self._control_channel = grpc.intercept_channel( self._control_channel, WorkerIdInterceptor(self._worker_id)) self._data_channel_factory = data_plane.GrpcClientDataChannelFactory( credentials, self._worker_id, data_buffer_time_limit_ms) self._state_handler_factory = GrpcStateHandlerFactory( self._state_cache, credentials) self._profiler_factory = profiler_factory self._fns = {} # type: Dict[str, beam_fn_api_pb2.ProcessBundleDescriptor] # BundleProcessor cache across all workers. self._bundle_processor_cache = BundleProcessorCache( state_handler_factory=self._state_handler_factory, data_channel_factory=self._data_channel_factory, fns=self._fns) if status_address: try: self._status_handler = FnApiWorkerStatusHandler( status_address, self._bundle_processor_cache) except Exception: traceback_string = traceback.format_exc() _LOGGER.warning( 'Error creating worker status request handler, ' 'skipping status report. Trace back: %s' % traceback_string) else: self._status_handler = None # TODO(BEAM-8998) use common UnboundedThreadPoolExecutor to process bundle # progress once dataflow runner's excessive progress polling is removed. self._report_progress_executor = futures.ThreadPoolExecutor(max_workers=1) self._worker_thread_pool = UnboundedThreadPoolExecutor() self._responses = queue.Queue( ) # type: queue.Queue[beam_fn_api_pb2.InstructionResponse] _LOGGER.info('Initializing SDKHarness with unbounded number of workers.')
def __init__(self, status_address, bundle_process_cache=None, enable_heap_dump=False, log_lull_timeout_ns=DEFAULT_LOG_LULL_TIMEOUT_NS): """Initialize FnApiWorkerStatusHandler. Args: status_address: The URL Runner uses to host the WorkerStatus server. bundle_process_cache: The BundleProcessor cache dict from sdk worker. """ self._alive = True self._bundle_process_cache = bundle_process_cache ch = GRPCChannelFactory.insecure_channel(status_address) grpc.channel_ready_future(ch).result(timeout=60) self._status_channel = grpc.intercept_channel(ch, WorkerIdInterceptor()) self._status_stub = beam_fn_api_pb2_grpc.BeamFnWorkerStatusStub( self._status_channel) self._responses = queue.Queue() self.log_lull_timeout_ns = log_lull_timeout_ns self._last_full_thread_dump_secs = 0.0 self._last_lull_logged_secs = 0.0 self._server = threading.Thread(target=lambda: self._serve(), name='fn_api_status_handler') self._server.daemon = True self._enable_heap_dump = enable_heap_dump self._server.start() self._lull_logger = threading.Thread( target=lambda: self._log_lull_in_bundle_processor( self._bundle_process_cache), name='lull_operation_logger') self._lull_logger.daemon = True self._lull_logger.start()
def _start_local_runner_subprocess_job_service(cls): cls._maybe_kill_subprocess() # TODO(robertwb): Consider letting the subprocess pick one and # communicate it back... # pylint: disable=unbalanced-tuple-unpacking job_port, expansion_port = cls._pick_unused_ports(num_ports=2) _LOGGER.info('Starting server on port %d.', job_port) cls._subprocess = subprocess.Popen( cls._subprocess_command(job_port, expansion_port)) address = 'localhost:%d' % job_port job_service = beam_job_api_pb2_grpc.JobServiceStub( GRPCChannelFactory.insecure_channel(address)) _LOGGER.info('Waiting for server to be ready...') start = time.time() timeout = 30 while True: time.sleep(0.1) if cls._subprocess.poll() is not None: raise RuntimeError( 'Subprocess terminated unexpectedly with exit code %d.' % cls._subprocess.returncode) elif time.time() - start > timeout: raise RuntimeError( 'Pipeline timed out waiting for job service subprocess.') else: try: job_service.GetState( beam_job_api_pb2.GetJobStateRequest(job_id='[fake]')) break except grpc.RpcError as exn: if exn.code() != grpc.StatusCode.UNAVAILABLE: # We were able to contact the service for our fake state request. break _LOGGER.info('Server ready.') return address
def _start_local_runner_subprocess_job_service(cls): cls._maybe_kill_subprocess() # TODO(robertwb): Consider letting the subprocess pick one and # communicate it back... port = cls._pick_unused_port() logging.info('Starting server on port %d.', port) cls._subprocess = subprocess.Popen(cls._subprocess_command(port)) address = 'localhost:%d' % port job_service = beam_job_api_pb2_grpc.JobServiceStub( GRPCChannelFactory.insecure_channel(address)) logging.info('Waiting for server to be ready...') start = time.time() timeout = 30 while True: time.sleep(0.1) if cls._subprocess.poll() is not None: raise RuntimeError( 'Subprocess terminated unexpectedly with exit code %d.' % cls._subprocess.returncode) elif time.time() - start > timeout: raise RuntimeError( 'Pipeline timed out waiting for job service subprocess.') else: try: job_service.GetState( beam_job_api_pb2.GetJobStateRequest(job_id='[fake]')) break except grpc.RpcError as exn: if exn.code() != grpc.StatusCode.UNAVAILABLE: # We were able to contact the service for our fake state request. break logging.info('Server ready.') return address
def __init__( self, control_address, credentials=None, worker_id=None, # Caching is disabled by default state_cache_size=0, profiler_factory=None): self._alive = True self._worker_index = 0 self._worker_id = worker_id self._state_cache = StateCache(state_cache_size) if credentials is None: _LOGGER.info('Creating insecure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.insecure_channel( control_address) else: _LOGGER.info('Creating secure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.secure_channel( control_address, credentials) grpc.channel_ready_future(self._control_channel).result(timeout=60) _LOGGER.info('Control channel established.') self._control_channel = grpc.intercept_channel( self._control_channel, WorkerIdInterceptor(self._worker_id)) self._data_channel_factory = data_plane.GrpcClientDataChannelFactory( credentials, self._worker_id) self._state_handler_factory = GrpcStateHandlerFactory( self._state_cache, credentials) self._profiler_factory = profiler_factory self._fns = {} # BundleProcessor cache across all workers. self._bundle_processor_cache = BundleProcessorCache( state_handler_factory=self._state_handler_factory, data_channel_factory=self._data_channel_factory, fns=self._fns) # TODO(BEAM-8998) use common UnboundedThreadPoolExecutor to process bundle # progress once dataflow runner's excessive progress polling is removed. self._report_progress_executor = futures.ThreadPoolExecutor( max_workers=1) self._worker_thread_pool = UnboundedThreadPoolExecutor() self._responses = queue.Queue() _LOGGER.info( 'Initializing SDKHarness with unbounded number of workers.')
def __init__(self, control_address, worker_count, credentials=None, worker_id=None, profiler_factory=None): self._alive = True self._worker_count = worker_count self._worker_index = 0 self._worker_id = worker_id if credentials is None: logging.info('Creating insecure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.insecure_channel( control_address) else: logging.info('Creating secure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.secure_channel( control_address, credentials) grpc.channel_ready_future(self._control_channel).result(timeout=60) logging.info('Control channel established.') self._control_channel = grpc.intercept_channel( self._control_channel, WorkerIdInterceptor(self._worker_id)) self._data_channel_factory = data_plane.GrpcClientDataChannelFactory( credentials) self._state_handler_factory = GrpcStateHandlerFactory(credentials) self._profiler_factory = profiler_factory self.workers = queue.Queue() # one thread is enough for getting the progress report. # Assumption: # Progress report generation should not do IO or wait on other resources. # Without wait, having multiple threads will not improve performance and # will only add complexity. self._progress_thread_pool = futures.ThreadPoolExecutor(max_workers=1) self._process_thread_pool = futures.ThreadPoolExecutor( max_workers=self._worker_count) self._instruction_id_vs_worker = {} self._fns = {} self._responses = queue.Queue() self._process_bundle_queue = queue.Queue() self._unscheduled_process_bundle = {} logging.info('Initializing SDKHarness with %s workers.', self._worker_count)
def start_worker(self): stub = beam_fn_api_pb2_grpc.BeamFnExternalWorkerPoolStub( GRPCChannelFactory.insecure_channel( self._external_payload.endpoint.url)) response = stub.NotifyRunnerAvailable( beam_fn_api_pb2.NotifyRunnerAvailableRequest( control_endpoint=endpoints_pb2.ApiServiceDescriptor( url=self.control_address), params=self._external_payload.params)) if response.error: raise RuntimeError("Error starting worker: %s" % response.error)
def __init__(self, control_address, # type: str credentials=None, worker_id=None, # type: Optional[str] # Caching is disabled by default state_cache_size=0, profiler_factory=None # type: Optional[Callable[..., Profile]] ): self._alive = True self._worker_index = 0 self._worker_id = worker_id self._state_cache = StateCache(state_cache_size) if credentials is None: _LOGGER.info('Creating insecure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.insecure_channel( control_address) else: _LOGGER.info('Creating secure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.secure_channel( control_address, credentials) grpc.channel_ready_future(self._control_channel).result(timeout=60) _LOGGER.info('Control channel established.') self._control_channel = grpc.intercept_channel( self._control_channel, WorkerIdInterceptor(self._worker_id)) self._data_channel_factory = data_plane.GrpcClientDataChannelFactory( credentials, self._worker_id) self._state_handler_factory = GrpcStateHandlerFactory(self._state_cache, credentials) self._profiler_factory = profiler_factory self._fns = {} # type: Dict[str, beam_fn_api_pb2.ProcessBundleDescriptor] # BundleProcessor cache across all workers. self._bundle_processor_cache = BundleProcessorCache( state_handler_factory=self._state_handler_factory, data_channel_factory=self._data_channel_factory, fns=self._fns) self._worker_thread_pool = UnboundedThreadPoolExecutor() self._responses = queue.Queue() # type: queue.Queue[beam_fn_api_pb2.InstructionResponse] _LOGGER.info('Initializing SDKHarness with unbounded number of workers.')
def start_worker(self): stub = beam_fn_api_pb2_grpc.BeamFnExternalWorkerPoolStub( GRPCChannelFactory.insecure_channel( self._external_payload.endpoint.url)) response = stub.NotifyRunnerAvailable( beam_fn_api_pb2.NotifyRunnerAvailableRequest( worker_id='worker_%s' % uuid.uuid4(), control_endpoint=endpoints_pb2.ApiServiceDescriptor( url=self.control_address), logging_endpoint=self.logging_api_service_descriptor(), params=self._external_payload.params)) if response.error: raise RuntimeError("Error starting worker: %s" % response.error)
def __init__( self, control_address, worker_count, credentials=None, worker_id=None, profiler_factory=None): self._alive = True self._worker_count = worker_count self._worker_index = 0 self._worker_id = worker_id if credentials is None: logging.info('Creating insecure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.insecure_channel( control_address) else: logging.info('Creating secure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.secure_channel( control_address, credentials) grpc.channel_ready_future(self._control_channel).result(timeout=60) logging.info('Control channel established.') self._control_channel = grpc.intercept_channel( self._control_channel, WorkerIdInterceptor(self._worker_id)) self._data_channel_factory = data_plane.GrpcClientDataChannelFactory( credentials) self._state_handler_factory = GrpcStateHandlerFactory(credentials) self._profiler_factory = profiler_factory self.workers = queue.Queue() # one thread is enough for getting the progress report. # Assumption: # Progress report generation should not do IO or wait on other resources. # Without wait, having multiple threads will not improve performance and # will only add complexity. self._progress_thread_pool = futures.ThreadPoolExecutor(max_workers=1) self._process_thread_pool = futures.ThreadPoolExecutor( max_workers=self._worker_count) self._instruction_id_vs_worker = {} self._fns = {} self._responses = queue.Queue() self._process_bundle_queue = queue.Queue() self._unscheduled_process_bundle = {} logging.info('Initializing SDKHarness with %s workers.', self._worker_count)
def __init__(self, log_service_descriptor): super(FnApiLogRecordHandler, self).__init__() self._alive = True self._dropped_logs = 0 self._log_entry_queue = queue.Queue(maxsize=self._QUEUE_SIZE) ch = GRPCChannelFactory.insecure_channel(log_service_descriptor.url) # Make sure the channel is ready to avoid [BEAM-4649] grpc.channel_ready_future(ch).result(timeout=60) self._log_channel = grpc.intercept_channel(ch, WorkerIdInterceptor()) self._reader = threading.Thread( target=lambda: self._read_log_control_messages(), name='read_log_control_messages') self._reader.daemon = True self._reader.start()
def start_worker(self): # type: () -> None stub = beam_fn_api_pb2_grpc.BeamFnExternalWorkerPoolStub( GRPCChannelFactory.insecure_channel( self._external_payload.endpoint.url)) control_descriptor = endpoints_pb2.ApiServiceDescriptor( url=self.control_address) response = stub.StartWorker( beam_fn_api_pb2.StartWorkerRequest( worker_id=self.worker_id, control_endpoint=control_descriptor, artifact_endpoint=control_descriptor, provision_endpoint=control_descriptor, logging_endpoint=self.logging_api_service_descriptor(), params=self._external_payload.params)) if response.error: raise RuntimeError("Error starting worker: %s" % response.error)
def __init__(self, log_service_descriptor): # type: (endpoints_pb2.ApiServiceDescriptor) -> None super().__init__() self._alive = True self._dropped_logs = 0 self._log_entry_queue = queue.Queue( maxsize=self._QUEUE_SIZE ) # type: queue.Queue[Union[beam_fn_api_pb2.LogEntry, Sentinel]] ch = GRPCChannelFactory.insecure_channel(log_service_descriptor.url) # Make sure the channel is ready to avoid [BEAM-4649] grpc.channel_ready_future(ch).result(timeout=60) self._log_channel = grpc.intercept_channel(ch, WorkerIdInterceptor()) self._reader = threading.Thread( target=lambda: self._read_log_control_messages(), name='read_log_control_messages') self._reader.daemon = True self._reader.start()
def __init__(self, status_address, bundle_process_cache=None): """Initialize FnApiWorkerStatusHandler. Args: status_address: The URL Runner uses to host the WorkerStatus server. bundle_process_cache: The BundleProcessor cache dict from sdk worker. """ self._alive = True self._bundle_process_cache = bundle_process_cache ch = GRPCChannelFactory.insecure_channel(status_address) grpc.channel_ready_future(ch).result(timeout=60) self._status_channel = grpc.intercept_channel(ch, WorkerIdInterceptor()) self._status_stub = beam_fn_api_pb2_grpc.BeamFnWorkerStatusStub( self._status_channel) self._responses = queue.Queue() self._server = threading.Thread( target=lambda: self._serve(), name='fn_api_status_handler') self._server.daemon = True self._server.start()
def run_pipeline(self, pipeline, options): portable_options = options.view_as(PortableOptions) job_endpoint = portable_options.job_endpoint # TODO: https://issues.apache.org/jira/browse/BEAM-5525 # portable runner specific default if options.view_as(SetupOptions).sdk_location == 'default': options.view_as(SetupOptions).sdk_location = 'container' if not job_endpoint: # TODO Provide a way to specify a container Docker URL # https://issues.apache.org/jira/browse/BEAM-6328 docker = DockerizedJobServer() job_endpoint = docker.start() # This is needed as we start a worker server if one is requested # but none is provided. if portable_options.environment_type == 'LOOPBACK': portable_options.environment_config, server = ( BeamFnExternalWorkerPoolServicer.start( sdk_worker_main._get_worker_count(options))) cleanup_callbacks = [functools.partial(server.stop, 1)] else: cleanup_callbacks = [] proto_pipeline = pipeline.to_runner_api( default_environment=PortableRunner._create_environment( portable_options)) # Some runners won't detect the GroupByKey transform unless it has no # subtransforms. Remove all sub-transforms until BEAM-4605 is resolved. for _, transform_proto in list( proto_pipeline.components.transforms.items()): if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn: for sub_transform in transform_proto.subtransforms: del proto_pipeline.components.transforms[sub_transform] del transform_proto.subtransforms[:] # Preemptively apply combiner lifting, until all runners support it. # This optimization is idempotent. if not options.view_as(StandardOptions).streaming: stages = list( fn_api_runner_transforms.leaf_transform_stages( proto_pipeline.root_transform_ids, proto_pipeline.components)) stages = fn_api_runner_transforms.lift_combiners( stages, fn_api_runner_transforms.TransformContext( proto_pipeline.components)) proto_pipeline = fn_api_runner_transforms.with_stages( proto_pipeline, stages) # TODO: Define URNs for options. # convert int values: https://issues.apache.org/jira/browse/BEAM-5509 p_options = { 'beam:option:' + k + ':v1': (str(v) if type(v) == int else v) for k, v in options.get_all_options().items() if v is not None } channel = GRPCChannelFactory.insecure_channel(job_endpoint) grpc.channel_ready_future(channel).result() job_service = beam_job_api_pb2_grpc.JobServiceStub(channel) # Sends the PrepareRequest but retries in case the channel is not ready def send_prepare_request(max_retries=5): num_retries = 0 while True: try: # This reports channel is READY but connections may fail # Seems to be only an issue on Mac with port forwardings grpc.channel_ready_future(channel).result() return job_service.Prepare( beam_job_api_pb2.PrepareJobRequest( job_name='job', pipeline=proto_pipeline, pipeline_options=job_utils.dict_to_struct( p_options))) except grpc._channel._Rendezvous as e: num_retries += 1 if num_retries > max_retries: raise e prepare_response = send_prepare_request() if prepare_response.artifact_staging_endpoint.url: stager = portable_stager.PortableStager( GRPCChannelFactory.insecure_channel( prepare_response.artifact_staging_endpoint.url), prepare_response.staging_session_token) retrieval_token, _ = stager.stage_job_resources( options, staging_location='') else: retrieval_token = None run_response = job_service.Run( beam_job_api_pb2.RunJobRequest( preparation_id=prepare_response.preparation_id, retrieval_token=retrieval_token)) return PipelineResult(job_service, run_response.job_id, cleanup_callbacks)
def __init__( self, control_address, # type: str credentials=None, # type: Optional[grpc.ChannelCredentials] worker_id=None, # type: Optional[str] # Caching is disabled by default state_cache_size=0, # type: int # time-based data buffering is disabled by default data_buffer_time_limit_ms=0, # type: int profiler_factory=None, # type: Optional[Callable[..., Profile]] status_address=None, # type: Optional[str] # Heap dump through status api is disabled by default enable_heap_dump=False, # type: bool ): # type: (...) -> None self._alive = True self._worker_index = 0 self._worker_id = worker_id self._state_cache = StateCache(state_cache_size) options = [('grpc.max_receive_message_length', -1), ('grpc.max_send_message_length', -1)] if credentials is None: _LOGGER.info('Creating insecure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.insecure_channel( control_address, options=options) else: _LOGGER.info('Creating secure control channel for %s.', control_address) self._control_channel = GRPCChannelFactory.secure_channel( control_address, credentials, options=options) grpc.channel_ready_future(self._control_channel).result(timeout=60) _LOGGER.info('Control channel established.') self._control_channel = grpc.intercept_channel( self._control_channel, WorkerIdInterceptor(self._worker_id)) self._data_channel_factory = data_plane.GrpcClientDataChannelFactory( credentials, self._worker_id, data_buffer_time_limit_ms) self._state_handler_factory = GrpcStateHandlerFactory( self._state_cache, credentials) self._profiler_factory = profiler_factory def default_factory(id): # type: (str) -> beam_fn_api_pb2.ProcessBundleDescriptor return self._control_stub.GetProcessBundleDescriptor( beam_fn_api_pb2.GetProcessBundleDescriptorRequest( process_bundle_descriptor_id=id)) self._fns = KeyedDefaultDict(default_factory) # BundleProcessor cache across all workers. self._bundle_processor_cache = BundleProcessorCache( state_handler_factory=self._state_handler_factory, data_channel_factory=self._data_channel_factory, fns=self._fns) if status_address: try: self._status_handler = FnApiWorkerStatusHandler( status_address, self._bundle_processor_cache, enable_heap_dump) # type: Optional[FnApiWorkerStatusHandler] except Exception: traceback_string = traceback.format_exc() _LOGGER.warning( 'Error creating worker status request handler, ' 'skipping status report. Trace back: %s' % traceback_string) else: self._status_handler = None # TODO(BEAM-8998) use common # thread_pool_executor.shared_unbounded_instance() to process bundle # progress once dataflow runner's excessive progress polling is removed. self._report_progress_executor = futures.ThreadPoolExecutor(max_workers=1) self._worker_thread_pool = thread_pool_executor.shared_unbounded_instance() self._responses = queue.Queue( ) # type: queue.Queue[Union[beam_fn_api_pb2.InstructionResponse, Sentinel]] _LOGGER.info('Initializing SDKHarness with unbounded number of workers.')