def compss_persistent_executor(config): # type: (PiperWorkerConfiguration) -> None """ Persistent executor main function. Retrieves the initial configuration and spawns the worker processes. :param config: Piper Worker Configuration description. :return: None """ COMM.gather(str(os.getpid()), root=0) # Catch SIGTERM sent by bindings_piper signal.signal(signal.SIGTERM, shutdown_handler) # Catch SIGUSER2 to solve strange behaviour with mpi4py signal.signal(signal.SIGUSR2, user_signal_handler) # Set the binding in worker mode import pycompss.util.context as context context.set_pycompss_context(context.WORKER) persistent_storage = (config.storage_conf != "null") logger, logger_cfg, storage_loggers, _ = load_loggers( config.debug, persistent_storage) cache_profiler = False if config.cache_profiler.lower() == 'true': cache_profiler = True if persistent_storage: # Initialize storage with event_worker(INIT_STORAGE_AT_WORKER_EVENT): from storage.api import initWorker as initStorageAtWorker # noqa initStorageAtWorker(config_file_path=config.storage_conf) process_name = "".join(("Rank-", str(RANK))) conf = ExecutorConf(config.debug, get_temporary_directory(), TRACING, config.storage_conf, logger, logger_cfg, persistent_storage, storage_loggers, config.stream_backend, config.stream_master_name, config.stream_master_port, CACHE_IDS, CACHE_QUEUE, cache_profiler) executor(None, process_name, config.pipes[RANK - 1], conf) if persistent_storage: # Finish storage if __debug__: logger.debug(HEADER + "Stopping persistent storage") with event_worker(FINISH_STORAGE_AT_WORKER_EVENT): from storage.api import finishWorker as finishStorageAtWorker # noqa finishStorageAtWorker()
def stop_storage(logger): # type: (typing.Any) -> None """ Stops the persistent storage. This function emits the event in the worker. :param logger: Logger where to log the messages. :return: None """ with event_worker(STOP_STORAGE_EVENT): __stop_storage__(logger)
def init_storage(storage_conf, logger): # noqa # type: (str, typing.Any) -> bool """ Call to init storage. This function emits the event in the worker. :param storage_conf: Storage configuration file. :param logger: Logger where to log the messages. :return: True if initialized. False on the contrary. """ with event_worker(INIT_STORAGE_EVENT): return __init_storage__(storage_conf, logger)
def process_quit(logger, process_name): # noqa # type: (typing.Any, str) -> bool """ Process quit message. Response: False. :param logger: Logger. :param process_name: Process name. :return: Always false. """ with event_worker(PROCESS_QUIT_EVENT): if __debug__: logger.debug(HEADER + "[%s] Received quit." % str(process_name)) return False
def process_ping(pipe, logger, process_name): # noqa # type: (Pipe, typing.Any, str) -> bool """ Process ping message. Response: Pong. :param pipe: Where to write the ping response. :param logger: Logger. :param process_name: Process name. :return: True if success. False otherwise. """ with event_worker(PROCESS_PING_EVENT): if __debug__: logger.debug(HEADER + "[%s] Received ping." % str(process_name)) try: pipe.write(PONG_TAG) except Exception: # noqa return False return True
def main(): # type: () -> None """ GAT worker main code. Executes the task provided by parameters. :return: None """ # Emit sync event if tracing is enabled tracing = sys.argv[1] == 'true' task_id = int(sys.argv[2]) log_level = sys.argv[3] storage_conf = sys.argv[4] stream_backend = sys.argv[5] stream_master_name = sys.argv[6] stream_master_port = sys.argv[7] # Next: method_type = sys.argv[8] params = sys.argv[9:] # Next parameters: # class_name = sys.argv[10] # method_name = sys.argv[11] # num_slaves = sys.argv[12] # i = 13 + num_slaves # slaves = sys.argv[12..i] # numCus = sys.argv[i+1] # has_target = sys.argv[i+2] == 'true' # num_params = int(sys.argv[i+3]) # params = sys.argv[i+4..] if log_level == "true" or log_level == "debug": print("Tracing = " + str(tracing)) print("Task id = " + str(task_id)) print("Log level = " + str(log_level)) print("Storage conf = " + str(storage_conf)) persistent_storage = False if storage_conf != "null": persistent_storage = True streaming = False if stream_backend not in [None, "null", "NONE"]: streaming = True with trace_multiprocessing_worker() if tracing else dummy_context(): if streaming: # Start streaming DistroStreamClientHandler.init_and_start( master_ip=stream_master_name, master_port=stream_master_port) # Load log level configuration file worker_path = os.path.dirname(os.path.realpath(__file__)) if log_level == "true" or log_level == "debug": # Debug log_json = "".join( (worker_path, "/../../../log/logging_gat_worker_debug.json")) elif log_level == "info" or log_level == "off": # Info or no debug log_json = "".join( (worker_path, "/../../../log/logging_gat_worker_off.json")) else: # Default log_json = "".join( (worker_path, "/../../../log/logging_gat_worker.json")) init_logging_worker(log_json, tracing) if persistent_storage: # Initialize storage with event_worker(INIT_STORAGE_AT_WORKER_EVENT): from storage.api import initWorker as initStorageAtWorker # noqa initStorageAtWorker(config_file_path=storage_conf) # Init worker exit_code = compss_worker(tracing, str(task_id), storage_conf, params, log_json) if streaming: # Finish streaming DistroStreamClientHandler.set_stop() if persistent_storage: # Finish storage with event_worker(FINISH_STORAGE_AT_WORKER_EVENT): from storage.api import finishWorker as finishStorageAtWorker # noqa finishStorageAtWorker() if exit_code == 1: exit(1)
def compss_persistent_worker(config): # type: (PiperWorkerConfiguration) -> None """ Persistent worker main function. Retrieves the initial configuration and spawns the worker processes. :param config: Piper Worker Configuration description. :return: None """ global CACHE global CACHE_PROCESS # Catch SIGTERM sent by bindings_piper signal.signal(signal.SIGTERM, shutdown_handler) # Set the binding in worker mode context.set_pycompss_context(context.WORKER) persistent_storage = (config.storage_conf != 'null') logger, logger_cfg, storage_loggers, log_dir = load_loggers(config.debug, persistent_storage) if __debug__: logger.debug(HEADER + "piper_worker.py wake up") config.print_on_logger(logger) if persistent_storage: # Initialize storage logger.debug(HEADER + "Starting persistent storage") with event_worker(INIT_STORAGE_AT_WORKER_EVENT): from storage.api import initWorker as initStorageAtWorker # noqa initStorageAtWorker(config_file_path=config.storage_conf) # Create new processes queues = [] cache_profiler = False if config.cache_profiler.lower() == 'true': cache_profiler = True # Setup cache if is_cache_enabled(str(config.cache)): # Deploy the necessary processes CACHE = True cache_params = start_cache(logger, str(config.cache), cache_profiler, log_dir) else: # No cache CACHE = False cache_params = (None, None, None, None) # type: ignore smm, CACHE_PROCESS, cache_queue, cache_ids = cache_params # Create new executor processes conf = ExecutorConf(config.debug, get_temporary_directory(), TRACING, config.storage_conf, logger, logger_cfg, persistent_storage, storage_loggers, config.stream_backend, config.stream_master_name, config.stream_master_port, cache_ids, cache_queue, cache_profiler) for i in range(0, config.tasks_x_node): if __debug__: logger.debug(HEADER + "Launching process " + str(i)) process_name = "".join(("Process-", str(i))) pid, queue = create_executor_process(process_name, conf, config.pipes[i]) queues.append(queue) # Read command from control pipe alive = True process_counter = config.tasks_x_node control_pipe = config.control_pipe # type: typing.Any while alive: command = control_pipe.read_command(retry_period=1) if command != "": line = command.split() if line[0] == ADD_EXECUTOR_TAG: process_name = "".join(("Process-", str(process_counter))) process_counter = process_counter + 1 in_pipe = line[1] out_pipe = line[2] pipe = Pipe(in_pipe, out_pipe) pid, queue = create_executor_process(process_name, conf, pipe) queues.append(queue) control_pipe.write(" ".join((ADDED_EXECUTOR_TAG, out_pipe, in_pipe, str(pid)))) elif line[0] == QUERY_EXECUTOR_ID_TAG: in_pipe = line[1] out_pipe = line[2] proc = PROCESSES.get(in_pipe) # type: typing.Any pid = proc.pid control_pipe.write(" ".join((REPLY_EXECUTOR_ID_TAG, out_pipe, in_pipe, str(pid)))) elif line[0] == CANCEL_TASK_TAG: in_pipe = line[1] cancel_proc = PROCESSES.get(in_pipe) # type: typing.Any cancel_pid = cancel_proc.pid if __debug__: logger.debug(HEADER + "Signaling process with PID " + str(cancel_pid) + " to cancel a task") os.kill(cancel_pid, signal.SIGUSR2) # NOSONAR cancellation produced by COMPSs elif line[0] == REMOVE_EXECUTOR_TAG: in_pipe = line[1] out_pipe = line[2] proc = PROCESSES.pop(in_pipe, None) if proc: if proc.is_alive(): logger.warn(HEADER + "Forcing terminate on : " + proc.name) proc.terminate() proc.join() control_pipe.write(" ".join((REMOVED_EXECUTOR_TAG, out_pipe, in_pipe))) elif line[0] == PING_TAG: control_pipe.write(PONG_TAG) elif line[0] == QUIT_TAG: alive = False # Wait for all threads for proc in PROCESSES.values(): proc.join() # Check if there is any exception message from the threads for i in range(0, config.tasks_x_node): if not queues[i].empty: logger.error(HEADER + "Exception in threads queue: " + str(queues[i].get())) for queue in queues: queue.close() queue.join_thread() if CACHE: stop_cache(smm, cache_queue, cache_profiler, CACHE_PROCESS) # noqa if persistent_storage: # Finish storage if __debug__: logger.debug(HEADER + "Stopping persistent storage") with event_worker(FINISH_STORAGE_AT_WORKER_EVENT): from storage.api import finishWorker as finishStorageAtWorker # noqa finishStorageAtWorker() if __debug__: logger.debug(HEADER + "Finished") control_pipe.write(QUIT_TAG) control_pipe.close()
def process_task( current_line, # type: str process_name, # type: str logger, # type: typing.Any log_json, # type: str logger_handlers, # type: typing.Any logger_level, # type: int logger_formatter # type: typing.Any ): # type: (...) -> typing.Tuple[int, str] """ Process command received from the current_line. :param current_line: Current command (line) to process. :param process_name: Process name for logger messages. :param logger: Logger. :param log_json: Logger configuration file. :param logger_handlers: Logger handlers. :param logger_level: Logger level. :param logger_formatter: Logger formatter. :return: exit_value and message. """ with event_worker(PROCESS_TASK_EVENT): # Process properties stdout = sys.stdout stderr = sys.stderr job_id = None if __debug__: logger.debug("[PYTHON EXECUTOR] [%s] Received message: %s" % (str(process_name), str(current_line))) splitted_current_line = current_line.split() if splitted_current_line[0] == EXECUTE_TASK_TAG: num_collection_params = int(splitted_current_line[-1]) collections_layouts = dict() if num_collection_params > 0: raw_layouts = splitted_current_line[( (num_collection_params * -4) - 1):-1] for i in range(num_collection_params): param = raw_layouts[i * 4] layout = [ int(raw_layouts[(i * 4) + 1]), int(raw_layouts[(i * 4) + 2]), int(raw_layouts[(i * 4) + 3]) ] collections_layouts[param] = layout # Remove the last elements: cpu and gpu bindings and collection params current_line_filtered = splitted_current_line[0:-3] # task jobId command job_id = current_line_filtered[1] job_out = current_line_filtered[2] job_err = current_line_filtered[3] # current_line_filtered[4] = <boolean> = tracing # current_line_filtered[5] = <integer> = task id # current_line_filtered[6] = <boolean> = debug # current_line_filtered[7] = <string> = storage conf. # current_line_filtered[8] = <string> = operation type (e.g. METHOD) # current_line_filtered[9] = <string> = module # current_line_filtered[10]= <string> = method # current_line_filtered[11]= <string> = time out # current_line_filtered[12]= <integer> = Number of slaves (worker nodes)==#nodes # <<list of slave nodes>> # current_line_filtered[12 + #nodes] = <integer> = computing units # current_line_filtered[13 + #nodes] = <boolean> = has target # current_line_filtered[14 + #nodes] = <string> = has return (always "null") # current_line_filtered[15 + #nodes] = <integer> = Number of parameters # <<list of parameters>> # !---> type, stream, prefix , value if __debug__: logger.debug( "[PYTHON EXECUTOR] [%s] Received task with id: %s" % (str(process_name), str(job_id))) logger.debug("[PYTHON EXECUTOR] [%s] - TASK CMD: %s" % (str(process_name), str(current_line_filtered))) # Swap logger from stream handler to file handler # All task output will be redirected to job.out/err for log_handler in logger_handlers: logger.removeHandler(log_handler) out_file_handler = logging.FileHandler(job_out) out_file_handler.setLevel(logger_level) out_file_handler.setFormatter(logger_formatter) err_file_handler = logging.FileHandler(job_err) err_file_handler.setLevel("ERROR") err_file_handler.setFormatter(logger_formatter) logger.addHandler(out_file_handler) logger.addHandler(err_file_handler) if __debug__: logger.debug("Received task in process: %s" % str(process_name)) logger.debug(" - TASK CMD: %s" % str(current_line_filtered)) try: # Setup out/err wrappers out = open(job_out, "a") err = open(job_err, "a") sys.stdout = out sys.stderr = err # Setup process environment cn = int(current_line_filtered[12]) cn_names = ",".join(current_line_filtered[13:13 + cn]) cu = int(current_line_filtered[13 + cn]) os.environ["COMPSS_NUM_NODES"] = str(cn) os.environ["COMPSS_HOSTNAMES"] = cn_names os.environ["COMPSS_NUM_THREADS"] = str(cu) os.environ["OMP_NUM_THREADS"] = str(cu) if __debug__: logger.debug("Process environment:") logger.debug("\t - Number of nodes: %s" % (str(cn))) logger.debug("\t - Hostnames: %s" % str(cn_names)) logger.debug("\t - Number of threads: %s" % (str(cu))) # Execute task storage_conf = "null" tracing = False python_mpi = True result = execute_task(process_name, storage_conf, current_line_filtered[9:], tracing, logger, log_json, (job_out, job_err), python_mpi, collections_layouts, None, None) exit_value, new_types, new_values, time_out, except_msg = result # Restore out/err wrappers sys.stdout = stdout sys.stderr = stderr sys.stdout.flush() sys.stderr.flush() out.close() err.close() # To reduce if necessary: # global_exit_value = MPI.COMM_WORLD.reduce(exit_value, # op=MPI.SUM, # root=0) # message = "" # if MPI.COMM_WORLD.rank == 0 and global_exit_value == 0: if exit_value == 0: # Task has finished without exceptions # endTask jobId exitValue message params = build_return_params_message(new_types, new_values) message = " ".join((END_TASK_TAG, str(job_id), str(exit_value), str(params) + "\n")) elif exit_value == 2: # Task has finished with a COMPSs Exception # compssExceptionTask jobId exitValue message except_msg = except_msg.replace(" ", "_") message = " ".join((COMPSS_EXCEPTION_TAG, str(job_id), str(except_msg) + "\n")) if __debug__: logger.debug("%s - COMPSS EXCEPTION TASK MESSAGE: %s" % (str(process_name), str(except_msg))) else: # elif MPI.COMM_WORLD.rank == 0 and global_exit_value != 0: # An exception has been raised in task message = " ".join( (END_TASK_TAG, str(job_id), str(exit_value) + "\n")) if __debug__: logger.debug("%s - END TASK MESSAGE: %s" % (str(process_name), str(message))) # The return message is: # # TaskResult ==> jobId exitValue D List<Object> # # Where List<Object> has D * 2 length: # D = #parameters == #task_parameters + # (has_target ? 1 : 0) + # #returns # And contains a pair of elements per parameter: # - Parameter new type. # - Parameter new value: # - "null" if it is NOT a PSCO # - PSCOId (String) if is a PSCO # Example: # 4 null 9 null 12 <pscoid> # # The order of the elements is: parameters + self + returns # # This is sent through the pipe with the END_TASK message. # If the task had an object or file as parameter and the worker # returns the id, the runtime can change the type (and locations) # to a EXTERNAL_OBJ_T. except Exception as e: logger.exception("%s - Exception %s" % (str(process_name), str(e))) exit_value = 7 message = " ".join( (END_TASK_TAG, str(job_id), str(exit_value) + "\n")) # Clean environment variables if __debug__: logger.debug("Cleaning environment.") del os.environ["COMPSS_HOSTNAMES"] # Restore loggers if __debug__: logger.debug("Restoring loggers.") logger.removeHandler(out_file_handler) logger.removeHandler(err_file_handler) for handler in logger_handlers: logger.addHandler(handler) if __debug__: logger.debug( "[PYTHON EXECUTOR] [%s] Finished task with id: %s" % (str(process_name), str(job_id))) # return SUCCESS_SIG, # "{0} -- Task Ended Successfully!".format(str(process_name)) else: if __debug__: logger.debug("[PYTHON EXECUTOR] [%s] Unexpected message: %s" % (str(process_name), str(current_line_filtered))) exit_value = 7 message = " ".join( (END_TASK_TAG, str(job_id), str(exit_value) + "\n")) return exit_value, message
def process_task( current_line, # type: list process_name, # type: str pipe, # type: Pipe queue, # type: typing.Optional[Queue] tracing, # type: bool logger, # type: typing.Any logger_cfg, # type: str logger_handlers, # type: list logger_level, # type: int logger_formatter, # type: typing.Any storage_conf, # type: str storage_loggers, # type: list storage_loggers_handlers, # type: list cache_queue, # type: typing.Optional[Queue] cache_ids, # type: typing.Any cache_profiler, # type: bool ): # type: (...) -> bool """ Process command received from the runtime through a pipe. :param current_line: Current command (line) to process. :param process_name: Process name for logger messages. :param pipe: Pipe where to write the result. :param queue: Queue where to drop the process exceptions. :param tracing: Tracing. :param logger: Logger. :param logger_cfg: Logger configuration file :param logger_handlers: Logger handlers. :param logger_level: Logger level. :param logger_formatter: Logger formatter. :param storage_conf: Storage configuration. :param storage_loggers: Storage loggers. :param storage_loggers_handlers: Storage loggers handlers. :param cache_queue: Cache tracker communication queue. :param cache_ids: Cache proxy dictionary (read-only). :param cache_profiler: Cache profiler :return: True if processed successfully, False otherwise. """ with event_worker(PROCESS_TASK_EVENT): affinity_event_emit = False binded_cpus = False binded_gpus = False # CPU binding cpus = current_line[-3] if cpus != "-" and THREAD_AFFINITY: # The cpu affinity event is already emitted in Java. # Instead of emitting what we receive, we are emitting what whe check # after setting the affinity. binded_cpus = bind_cpus(cpus, process_name, logger) # GPU binding gpus = current_line[-2] if gpus != "-": emit_manual_event(int(gpus) + 1, inside=True, gpu_affinity=True) bind_gpus(gpus, process_name, logger) binded_gpus = True # Remove the last elements: cpu and gpu bindings current_line = current_line[0:-3] # task jobId command job_id, job_out, job_err = current_line[1:4] # 4th is not taken # current_line[4] = <boolean> = tracing # current_line[5] = <integer> = task id # current_line[6] = <boolean> = debug # current_line[7] = <string> = storage conf. # current_line[8] = <string> = operation type (e.g. METHOD) # current_line[9] = <string> = module # current_line[10]= <string> = method # current_line[11]= <string> = time out # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes # <<list of slave nodes>> # current_line[12 + #nodes] = <integer> = computing units # current_line[13 + #nodes] = <boolean> = has target # current_line[14 + #nodes] = <string> = has return (always "null") # current_line[15 + #nodes] = <integer> = Number of parameters # <<list of parameters>> # !---> type, stream, prefix , value if __debug__: logger.debug(HEADER + "[%s] Received task with id: %s" % (str(process_name), str(job_id))) logger.debug(HEADER + "[%s] - TASK CMD: %s" % (str(process_name), str(current_line))) # Swap logger from stream handler to file handler # All task output will be redirected to job.out/err for log_handler in logger_handlers: logger.removeHandler(log_handler) for storage_logger in storage_loggers: for log_handler in storage_logger.handlers: storage_logger.removeHandler(log_handler) out_file_handler = logging.FileHandler(job_out) out_file_handler.setLevel(logger_level) out_file_handler.setFormatter(logger_formatter) err_file_handler = logging.FileHandler(job_err) err_file_handler.setLevel("ERROR") err_file_handler.setFormatter(logger_formatter) logger.addHandler(out_file_handler) logger.addHandler(err_file_handler) for storage_logger in storage_loggers: storage_logger.addHandler(out_file_handler) storage_logger.addHandler(err_file_handler) if __debug__: # From now onwards the log is in the job out and err files logger.debug("-" * 100) logger.debug("Received task in process: %s" % str(process_name)) logger.debug("TASK CMD: %s" % str(current_line)) try: # Check thread affinity if THREAD_AFFINITY: # The cpu affinity can be long if multiple cores have been # assigned. To avoid issues, we get just the first id. real_affinity = thread_affinity.getaffinity() cpus = str(real_affinity[0]) num_cpus = len(real_affinity) emit_manual_event(int(cpus) + 1, inside=True, cpu_affinity=True) emit_manual_event(int(num_cpus), inside=True, cpu_number=True) affinity_event_emit = True if not binded_cpus: logger.warning( "This task is going to be executed with default thread affinity %s" % # noqa: E501 str(real_affinity)) # Setup process environment cn = int(current_line[12]) cn_names = ",".join(current_line[13:13 + cn]) cu = current_line[13 + cn] if __debug__: logger.debug("Process environment:") logger.debug("\t - Number of nodes: %s" % (str(cn))) logger.debug("\t - Hostnames: %s" % str(cn_names)) logger.debug("\t - Number of threads: %s" % (str(cu))) setup_environment(cn, cn_names, cu) # Execute task result = execute_task(process_name, storage_conf, current_line[9:], tracing, logger, logger_cfg, (job_out, job_err), False, None, cache_queue, cache_ids, cache_profiler) # The ignored variable is timed_out exit_value, new_types, new_values, _, except_msg = result if exit_value == 0: # Task has finished without exceptions # endTask jobId exitValue message message = build_successful_message(new_types, new_values, job_id, exit_value) # noqa: E501 if __debug__: logger.debug("%s - Pipe %s END TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(message))) elif exit_value == 2: # Task has finished with a COMPSs Exception # compssExceptionTask jobId exitValue message except_msg, message = build_compss_exception_message( except_msg, job_id) # noqa: E501 if __debug__: logger.debug( "%s - Pipe %s COMPSS EXCEPTION TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(except_msg))) else: # An exception other than COMPSsException has been raised # within the task message = build_exception_message(job_id, exit_value) if __debug__: logger.debug("%s - Pipe %s END TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(message))) # The return message is: # # TaskResult ==> jobId exitValue D List<Object> # # Where List<Object> has D * 2 length: # D = #parameters == #task_parameters + # (has_target ? 1 : 0) + # #returns # And contains a pair of elements per parameter: # - Parameter new type. # - Parameter new value: # - "null" if it is NOT a PSCO # - PSCOId (String) if is a PSCO # Example: # 4 null 9 null 12 <pscoid> # # The order of the elements is: parameters + self + returns # # This is sent through the pipe with the END_TASK message. # If the task had an object or file as parameter and the worker # returns the id, the runtime can change the type (and locations) # to a EXTERNAL_OBJ_T. except Exception as e: logger.exception("%s - Exception %s" % (str(process_name), str(e))) if queue: queue.put("EXCEPTION") # Stop the worker process return False # Clean environment variables if __debug__: logger.debug("Cleaning environment.") clean_environment(binded_cpus, binded_gpus) if affinity_event_emit: emit_manual_event(0, inside=True, cpu_affinity=True) emit_manual_event(0, inside=True, cpu_number=True) if binded_gpus: emit_manual_event(0, inside=True, gpu_affinity=True) # Restore loggers if __debug__: logger.debug("Restoring loggers.") logger.debug("-" * 100) # No more logs in job out and err files # Restore worker log logger.removeHandler(out_file_handler) logger.removeHandler(err_file_handler) logger.handlers = [] for handler in logger_handlers: logger.addHandler(handler) i = 0 for storage_logger in storage_loggers: storage_logger.removeHandler(out_file_handler) storage_logger.removeHandler(err_file_handler) storage_logger.handlers = [] for handler in storage_loggers_handlers[i]: storage_logger.addHandler(handler) i += 1 if __debug__: logger.debug(HEADER + "[%s] Finished task with id: %s" % (str(process_name), str(job_id))) # Notify the runtime that the task has finished pipe.write(message) return True
def executor(queue, process_name, pipe, conf): # type: (typing.Union[None, Queue], str, Pipe, typing.Any) -> None """Thread main body - Overrides Threading run method. Iterates over the input pipe in order to receive tasks (with their parameters) and process them. Notifies the runtime when each task has finished with the corresponding output value. Finishes when the "quit" message is received. :param queue: Queue where to put exception messages. :param process_name: Process name (Thread-X, where X is the thread id). :param pipe: Pipe to receive and send messages from/to the runtime. :param conf: configuration of the executor. :return: None """ try: # Replace Python Worker's SIGTERM handler. signal.signal(signal.SIGTERM, shutdown_handler) if len(conf.logger.handlers) == 0: # Logger has not been inherited correctly. Happens in MacOS. set_temporary_directory(conf.tmp_dir, create_tmpdir=False) # Reload logger conf.logger, conf.logger_cfg, conf.storage_loggers, _ = \ load_loggers(conf.debug, conf.persistent_storage) # Set the binding in worker mode too context.set_pycompss_context(context.WORKER) logger = conf.logger tracing = conf.tracing storage_conf = conf.storage_conf storage_loggers = conf.storage_loggers # Get a copy of the necessary information from the logger to # re-establish after each task logger_handlers = copy.copy(logger.handlers) logger_level = logger.getEffectiveLevel() logger_formatter = logging.Formatter( logger_handlers[0].formatter._fmt) # noqa storage_loggers_handlers = [] for storage_logger in storage_loggers: storage_loggers_handlers.append(copy.copy(storage_logger.handlers)) # Establish link with the binding-commons to enable task nesting if __debug__: logger.debug(HEADER + "Establishing link with runtime in process " + str(process_name)) # noqa: E501 COMPSs.load_runtime(external_process=False, _logger=logger) COMPSs.set_pipes(pipe.output_pipe, pipe.input_pipe) if storage_conf != "null": try: from storage.api import initWorkerPostFork # noqa with event_worker(INIT_WORKER_POSTFORK_EVENT): initWorkerPostFork() except (ImportError, AttributeError): if __debug__: logger.info( HEADER + "[%s] Could not find initWorkerPostFork storage call. Ignoring it." % # noqa: E501 str(process_name)) # Start the streaming backend if necessary streaming = False if conf.stream_backend not in [None, "null", "NONE"]: streaming = True if streaming: # Initialize streaming logger.debug(HEADER + "Starting streaming for process " + str(process_name)) try: DistroStreamClientHandler.init_and_start( master_ip=conf.stream_master_ip, master_port=conf.stream_master_port) except Exception as e: logger.error(e) raise e # Connect to Shared memory manager if conf.cache_queue: load_shared_memory_manager() # Process properties alive = True if __debug__: logger.debug(HEADER + "[%s] Starting process" % str(process_name)) # MAIN EXECUTOR LOOP while alive: # Runtime -> pipe - Read command from pipe command = COMPSs.read_pipes() if command != "": if __debug__: logger.debug(HEADER + "[%s] Received command %s" % (str(process_name), str(command))) # Process the command alive = process_message( command, process_name, pipe, queue, tracing, logger, conf.logger_cfg, logger_handlers, logger_level, logger_formatter, storage_conf, storage_loggers, storage_loggers_handlers, conf.cache_queue, conf.cache_ids, conf.cache_profiler) # Stop storage if storage_conf != "null": try: from storage.api import finishWorkerPostFork # noqa with event_worker(FINISH_WORKER_POSTFORK_EVENT): finishWorkerPostFork() except (ImportError, AttributeError): if __debug__: logger.info( HEADER + "[%s] Could not find finishWorkerPostFork storage call. Ignoring it." % # noqa: E501 str(process_name)) # Stop streaming if streaming: logger.debug(HEADER + "Stopping streaming for process " + str(process_name)) DistroStreamClientHandler.set_stop() sys.stdout.flush() sys.stderr.flush() if __debug__: logger.debug(HEADER + "[%s] Exiting process " % str(process_name)) pipe.write(QUIT_TAG) pipe.close() except Exception as e: logger.error(e) raise e