def partition_by(self, partitioner_func=default_hash, num_of_partitions=-1): """ Create partitions by a Partition Func. :param partitioner_func: A Function distribute data on partitions based on For example, hash function. :param num_of_partitions: number of partitions to be created :return: >>> dds = DDS().load(range(6)).map(lambda x: (x, x)) >>> dds.partition_by(num_of_partitions=3).collect(True) [[(0, 0), (3, 3)], [(1, 1), (4, 4)], [(2, 2), (5, 5)]] """ def combine_lists(_partition): # Elements of the partition are grouped by their previous partitions ret = list() for _li in _partition: ret.extend(_li) return ret nop = len(self.partitions) if num_of_partitions == -1 \ else num_of_partitions grouped = defaultdict(list) if self.paac: for collection in self.partitions: col = [[] for _ in range(nop)] with event(3002, master=True): distribute_partition(col, self.func, partitioner_func, None, collection) cdo(collection) for _i in range(nop): grouped[_i].append(col[_i]) else: for _part in self.partitions: col = [[] for _ in range(nop)] with event(3002, master=True): distribute_partition(col, self.func, partitioner_func, _part) for _i in range(nop): grouped[_i].append(col[_i]) future_partitions = list() for key in sorted(grouped.keys()): future_partitions.append(grouped[key]) return DDS().load(future_partitions, -1, True)\ .map_partitions(combine_lists)
def start_runtime(log_level='off', tracing=0, interactive=False): # type: (str, int, bool) -> None """ Starts the COMPSs runtime. Starts the runtime by calling the external python library that calls the bindings-common. :param log_level: Log level [ 'trace' | 'debug' | 'info' | 'api' | 'off' ]. :param tracing: Tracing level [0 (deactivated) | 1 (basic) | 2 (advanced)]. :param interactive: Boolean if interactive (ipython or jupyter). :return: None """ if __debug__: logger.info("Starting COMPSs...") if tracing > 0 and not interactive: # Enabled only if not interactive - extrae issues within jupyter. enable_trace_master() with event(START_RUNTIME_EVENT, master=True): if interactive and context.in_master(): COMPSs.load_runtime(external_process=True) else: COMPSs.load_runtime(external_process=False) if log_level == 'trace': # Could also be 'debug' or True, but we only show the C extension # debug in the maximum tracing level. COMPSs.set_debug(True) OT_enable_report() COMPSs.start_runtime() if __debug__: logger.info("COMPSs started")
def collect(self, keep_partitions=False, future_objects=False): """ Returns all elements from all partitions. Elements can be grouped by partitions by setting keep_partitions value as True. :param keep_partitions: Keep Partitions? :param future_objects: :return: >>> dds = DDS().load(range(10), 2) >>> dds.collect(True) [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] >>> DDS().load(range(10), 2).collect() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] """ processed = list() if self.func: if self.paac: for col in self.partitions: with event(3001, master=True): processed.append(map_partition(self.func, None, *col)) else: for _p in self.partitions: with event(3001, master=True): processed.append(map_partition(self.func, _p)) # Reset the function! self.func = None else: for _p in self.partitions: if isinstance(_p, IPartitionGenerator): processed.append(_p.retrieve_data()) else: processed.append(_p) # Future objects cannot be extended for now... if future_objects: return processed processed = cwo(processed) ret = list() if not keep_partitions: for _pp in processed: ret.extend(_pp) else: for _pp in processed: ret.append(list(_pp)) return ret
def compss_persistent_executor(config): # type: (PiperWorkerConfiguration) -> None """ Persistent executor main function. Retrieves the initial configuration and spawns the worker processes. :param config: Piper Worker Configuration description. :return: None """ COMM.gather(str(os.getpid()), root=0) # Catch SIGTERM sent by bindings_piper signal.signal(signal.SIGTERM, shutdown_handler) # Catch SIGUSER2 to solve strange behaviour with mpi4py signal.signal(signal.SIGUSR2, user_signal_handler) # Set the binding in worker mode import pycompss.util.context as context context.set_pycompss_context(context.WORKER) persistent_storage = (config.storage_conf != 'null') logger, logger_cfg, storage_loggers = load_loggers(config.debug, persistent_storage) if persistent_storage: # Initialize storage with event(INIT_STORAGE_AT_WORKER_EVENT): from storage.api import initWorker as initStorageAtWorker # noqa initStorageAtWorker(config_file_path=config.storage_conf) process_name = "".join(("Rank-", str(RANK))) conf = ExecutorConf(TRACING, config.storage_conf, logger, logger_cfg, storage_loggers, config.stream_backend, config.stream_master_name, config.stream_master_port, CACHE_IDS, CACHE_QUEUE) executor(None, process_name, config.pipes[RANK - 1], conf) if persistent_storage: # Finish storage if __debug__: logger.debug(HEADER + "Stopping persistent storage") with event(FINISH_STORAGE_AT_WORKER_EVENT): from storage.api import finishWorker as finishStorageAtWorker # noqa finishStorageAtWorker()
def main(): # Emit sync event if tracing is enabled tracing = sys.argv[1] == 'true' task_id = int(sys.argv[2]) log_level = sys.argv[3] storage_conf = sys.argv[4] stream_backend = sys.argv[5] stream_master_name = sys.argv[6] stream_master_port = sys.argv[7] # method_type = sys.argv[8] params = sys.argv[9:] # class_name = sys.argv[9] # method_name = sys.argv[10] # num_slaves = sys.argv[11] # i = 11 + num_slaves # slaves = sys.argv[11..i] # numCus = sys.argv[i+1] # has_target = sys.argv[i+2] == 'true' # num_params = int(sys.argv[i+3]) # params = sys.argv[i+4..] print("tracing = " + str(tracing)) print("task_id = " + str(task_id)) print("log_level = " + str(log_level)) print("storage_conf = " + str(storage_conf)) persistent_storage = False if storage_conf != 'null': persistent_storage = True streaming = False if stream_backend not in [None, 'null', 'NONE']: streaming = True with trace_multiprocessing_worker() if tracing else dummy_context(): if streaming: # Start streaming DistroStreamClientHandler.init_and_start( master_ip=stream_master_name, master_port=stream_master_port) # Load log level configuration file worker_path = os.path.dirname(os.path.realpath(__file__)) if log_level == 'true' or log_level == "debug": # Debug init_logging_worker( worker_path + '/../../../log/logging_gat_worker_debug.json', tracing) elif log_level == "info" or log_level == "off": # Info or no debug init_logging_worker( worker_path + '/../../../log/logging_gat_worker_off.json', tracing) else: # Default init_logging_worker( worker_path + '/../../../log/logging_gat_worker.json', tracing) if persistent_storage: # Initialize storage with event(INIT_STORAGE_AT_WORKER_EVENT): from storage.api import initWorker as initStorageAtWorker initStorageAtWorker(config_file_path=storage_conf) # Init worker exit_code = compss_worker(tracing, str(task_id), storage_conf, params) if streaming: # Finish streaming DistroStreamClientHandler.set_stop() if persistent_storage: # Finish storage with event(FINISH_STORAGE_AT_WORKER_EVENT): from storage.api import finishWorker as finishStorageAtWorker finishStorageAtWorker() if exit_code == 1: exit(1)
def launch_pycompss_application( app, func, log_level='off', # type: str o_c=False, # type: bool debug=False, # type: bool graph=False, # type: bool trace=False, # type: bool monitor=None, # type: int project_xml=None, # type: str resources_xml=None, # type: str summary=False, # type: bool task_execution='compss', # type: str storage_impl=None, # type: str storage_conf=None, # type: str streaming_backend=None, # type: str streaming_master_name=None, # type: str streaming_master_port=None, # type: str task_count=50, # type: int app_name=None, # type: str uuid=None, # type: str base_log_dir=None, # type: str specific_log_dir=None, # type: str extrae_cfg=None, # type: str comm='NIO', # type: str conn='es.bsc.compss.connectors.DefaultSSHConnector', # noqa: E501 master_name='', # type: str master_port='', # type: str scheduler='es.bsc.compss.scheduler.loadbalancing.LoadBalancingScheduler', # noqa: E501 jvm_workers='-Xms1024m,-Xmx1024m,-Xmn400m', cpu_affinity='automatic', # type: str gpu_affinity='automatic', # type: str fpga_affinity='automatic', # type: str fpga_reprogram='', # type: str profile_input='', # type: str profile_output='', # type: str scheduler_config='', # type: str external_adaptation=False, # type: bool propagate_virtual_environment=True, # type: bool mpi_worker=False, # type: bool *args, **kwargs): # type: (...) -> None """ Launch PyCOMPSs application from function. :param app: Application path :param func: Function :param log_level: Logging level [ 'trace'|'debug'|'info'|'api'|'off' ] (default: 'off') :param o_c: Objects to string conversion [ True | False ] (default: False) :param debug: Debug mode [ True | False ] (default: False) (overrides log_level) :param graph: Generate graph [ True | False ] (default: False) :param trace: Generate trace [ True | False | 'scorep' | 'arm-map' | 'arm-ddt'] (default: False) :param monitor: Monitor refresh rate (default: None) :param project_xml: Project xml file path :param resources_xml: Resources xml file path :param summary: Execution summary [ True | False ] (default: False) :param task_execution: Task execution (default: 'compss') :param storage_impl: Storage implementation path :param storage_conf: Storage configuration file path :param streaming_backend: Streaming backend (default: None) :param streaming_master_name: Streaming master name (default: None) :param streaming_master_port: Streaming master port (default: None) :param task_count: Task count (default: 50) :param app_name: Application name (default: Interactive_date) :param uuid: UUId :param base_log_dir: Base logging directory :param specific_log_dir: Specific logging directory :param extrae_cfg: Extrae configuration file path :param comm: Communication library (default: NIO) :param conn: Connector (default: DefaultSSHConnector) :param master_name: Master Name (default: '') :param master_port: Master port (default: '') :param scheduler: Scheduler (default: es.bsc.compss.scheduler.loadbalancing.LoadBalancingScheduler) :param jvm_workers: Java VM parameters (default: '-Xms1024m,-Xmx1024m,-Xmn400m') :param cpu_affinity: CPU Core affinity (default: 'automatic') :param gpu_affinity: GPU Core affinity (default: 'automatic') :param fpga_affinity: FPA Core affinity (default: 'automatic') :param fpga_reprogram: FPGA repogram command (default: '') :param profile_input: Input profile (default: '') :param profile_output: Output profile (default: '') :param scheduler_config: Scheduler configuration (default: '') :param external_adaptation: External adaptation [ True | False ] (default: False) :param propagate_virtual_environment: Propagate virtual environment [ True | False ] (default: False) :param mpi_worker: Use the MPI worker [ True | False ] (default: False) :param args: Positional arguments :param kwargs: Named arguments :return: Execution result """ # Let the Python binding know we are at master context.set_pycompss_context(context.MASTER) # Then we can import the appropriate start and stop functions from the API from pycompss.api.api import compss_start, compss_stop ############################################################## # INITIALIZATION ############################################################## # Initial dictionary with the user defined parameters all_vars = { 'log_level': log_level, 'debug': debug, 'o_c': o_c, 'graph': graph, 'trace': trace, 'monitor': monitor, 'project_xml': project_xml, 'resources_xml': resources_xml, 'summary': summary, 'task_execution': task_execution, 'storage_impl': storage_impl, 'storage_conf': storage_conf, 'streaming_backend': streaming_backend, 'streaming_master_name': streaming_master_name, 'streaming_master_port': streaming_master_port, 'task_count': task_count, 'app_name': app_name, 'uuid': uuid, 'base_log_dir': base_log_dir, 'specific_log_dir': specific_log_dir, 'extrae_cfg': extrae_cfg, 'comm': comm, 'conn': conn, 'master_name': master_name, 'master_port': master_port, 'scheduler': scheduler, 'jvm_workers': jvm_workers, 'cpu_affinity': cpu_affinity, 'gpu_affinity': gpu_affinity, 'fpga_affinity': fpga_affinity, 'fpga_reprogram': fpga_reprogram, 'profile_input': profile_input, 'profile_output': profile_output, 'scheduler_config': scheduler_config, 'external_adaptation': external_adaptation, 'propagate_virtual_environment': propagate_virtual_environment, 'mpi_worker': mpi_worker } # Check the provided flags flags, issues = check_flags(all_vars) if not flags: print_flag_issues(issues) return None # Prepare the environment env_vars = prepare_environment(False, o_c, storage_impl, app, debug, trace, mpi_worker) all_vars.update(env_vars) monitoring_vars = prepare_loglevel_graph_for_monitoring( monitor, graph, debug, log_level) all_vars.update(monitoring_vars) if RUNNING_IN_SUPERCOMPUTER: updated_vars = updated_variables_in_sc() all_vars.update(updated_vars) to_update = prepare_tracing_environment(all_vars['trace'], all_vars['extrae_lib'], all_vars['ld_library_path']) all_vars['trace'], all_vars['ld_library_path'] = to_update inf_vars = check_infrastructure_variables(all_vars['project_xml'], all_vars['resources_xml'], all_vars['compss_home'], all_vars['app_name'], all_vars['file_name'], all_vars['external_adaptation']) all_vars.update(inf_vars) create_init_config_file(**all_vars) ############################################################## # RUNTIME START ############################################################## # Runtime start compss_start(log_level, all_vars['trace'], True) # Setup logging binding_log_path = get_log_path() log_path = os.path.join(all_vars['compss_home'], 'Bindings', 'python', str(all_vars['major_version']), 'log') set_temporary_directory(binding_log_path) logging_cfg_file = get_logging_cfg_file(log_level) init_logging(os.path.join(log_path, logging_cfg_file), binding_log_path) logger = logging.getLogger("pycompss.runtime.launch") logger.debug('--- START ---') logger.debug('PyCOMPSs Log path: %s' % log_path) logger.debug("Starting storage") persistent_storage = master_init_storage(all_vars['storage_conf'], logger) logger.debug("Starting streaming") streaming = init_streaming(all_vars['streaming_backend'], all_vars['streaming_master_name'], all_vars['streaming_master_port']) saved_argv = sys.argv sys.argv = args # Execution: with event(APPLICATION_RUNNING_EVENT, master=True): if func is None or func == '__main__': if IS_PYTHON3: exec(open(app).read()) else: execfile(app) # noqa result = None else: if IS_PYTHON3: import importlib.util spec = importlib.util.spec_from_file_location( all_vars['file_name'], app) # noqa: E501 imported_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(imported_module) # noqa else: import imp # noqa imported_module = imp.load_source(all_vars['file_name'], app) # noqa method_to_call = getattr(imported_module, func) result = method_to_call(*args, **kwargs) # Recover the system arguments sys.argv = saved_argv # Stop streaming if streaming: stop_streaming() # Stop persistent storage if persistent_storage: master_stop_storage(logger) logger.debug('--- END ---') ############################################################## # RUNTIME STOP ############################################################## # Stop runtime compss_stop() return result
def compss_main(): # type: () -> None """ PyCOMPSs main function. General call: python $PYCOMPSS_HOME/pycompss/runtime/launch.py $log_level $PyObject_serialize $storage_conf $streaming_backend $streaming_master_name $streaming_master_port $fullAppPath $application_args :return: None """ global APP_PATH # Let the Python binding know we are at master context.set_pycompss_context(context.MASTER) # Then we can import the appropriate start and stop functions from the API from pycompss.api.api import compss_start, compss_stop # See parse_arguments, defined above # In order to avoid parsing user arguments, we are going to remove user # args from sys.argv user_sys_argv = sys.argv[9:] sys.argv = sys.argv[:9] args = parse_arguments() # We are done, now sys.argv must contain user args only sys.argv = [args.app_path] + user_sys_argv # Get log_level log_level = args.log_level # Setup tracing tracing = int(args.tracing) # Start the runtime compss_start(log_level, tracing, False) # Get object_conversion boolean set_object_conversion(args.object_conversion == 'true') # Get storage configuration at master storage_conf = args.storage_configuration # Get application execution path APP_PATH = args.app_path # Setup logging binding_log_path = get_log_path() log_path = os.path.join(os.getenv('COMPSS_HOME'), 'Bindings', 'python', str(_PYTHON_VERSION), 'log') set_temporary_directory(binding_log_path) logging_cfg_file = get_logging_cfg_file(log_level) init_logging(os.path.join(log_path, logging_cfg_file), binding_log_path) logger = logging.getLogger("pycompss.runtime.launch") # Get JVM options # jvm_opts = os.environ['JVM_OPTIONS_FILE'] # from pycompss.util.jvm.parser import convert_to_dict # opts = convert_to_dict(jvm_opts) # storage_conf = opts.get('-Dcompss.storage.conf') exit_code = 0 try: if __debug__: logger.debug('--- START ---') logger.debug('PyCOMPSs Log path: %s' % binding_log_path) # Start persistent storage persistent_storage = master_init_storage(storage_conf, logger) # Start streaming streaming = init_streaming(args.streaming_backend, args.streaming_master_name, args.streaming_master_port) # Show module warnings if __debug__: show_optional_module_warnings() # MAIN EXECUTION with event(APPLICATION_RUNNING_EVENT, master=True): if IS_PYTHON3: with open(APP_PATH) as f: exec(compile(f.read(), APP_PATH, 'exec'), globals()) else: execfile(APP_PATH, globals()) # MAIN EXECUTION # Stop streaming if streaming: stop_streaming() # Stop persistent storage if persistent_storage: master_stop_storage(logger) # End if __debug__: logger.debug('--- END ---') except SystemExit as e: if e.code != 0: print('[ ERROR ]: User program ended with exitcode %s.' % e.code) print('\t\tShutting down runtime...') exit_code = e.code except SerializerException: exit_code = 1 # If an object that can not be serialized has been used as a parameter. print("[ ERROR ]: Serialization exception") exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) for line in lines: if APP_PATH in line: print('[ ERROR ]: In: %s', line) exit_code = 1 except COMPSsException as e: # Any other exception occurred print("[ ERROR ]: A COMPSs exception occurred: " + str(e)) traceback.print_exc() exit_code = 0 # COMPSs exception is not considered an error except Exception as e: # Any other exception occurred print("[ ERROR ]: An exception occurred: " + str(e)) traceback.print_exc() exit_code = 1 finally: # Stop runtime compss_stop(exit_code) sys.stdout.flush() sys.stderr.flush() sys.exit(exit_code)
def __decorator_body__(self, user_function, args, kwargs): # Determine the context and decide what to do if context.in_master(): # @task being executed in the master # Each task will have a TaskMaster, so its content will # not be shared. self.__check_core_element__(kwargs, user_function) with event(TASK_INSTANTIATION, master=True): master = TaskMaster(self.decorator_arguments, self.user_function, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints, self.on_failure, self.defaults) result = master.call(*args, **kwargs) fo, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints = result # noqa: E501 del master return fo elif context.in_worker(): if "compss_key" in kwargs.keys(): if context.is_nesting_enabled(): # Update the whole logger since it will be in job out/err update_logger_handlers(kwargs["compss_log_cfg"], kwargs["compss_log_files"][0], kwargs["compss_log_files"][1]) # @task being executed in the worker with event(WORKER_TASK_INSTANTIATION, master=False, inside=True): worker = TaskWorker(self.decorator_arguments, self.user_function, self.on_failure, self.defaults) result = worker.call(*args, **kwargs) # Force flush stdout and stderr sys.stdout.flush() sys.stderr.flush() # Remove worker del worker if context.is_nesting_enabled(): # Wait for all nested tasks to finish from pycompss.runtime.binding import nested_barrier nested_barrier() # Reestablish logger handlers update_logger_handlers(kwargs["compss_log_cfg"]) return result else: if context.is_nesting_enabled(): # Each task will have a TaskMaster, so its content will # not be shared. with event(TASK_INSTANTIATION, master=True): master = TaskMaster( self.decorator_arguments, self.user_function, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints, self.on_failure, self.defaults) result = master.call(*args, **kwargs) fo, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints = result # noqa: E501 del master return fo else: # Called from another task within the worker # Ignore the @task decorator and run it sequentially message = "".join(( "WARNING: Calling task: ", str(user_function.__name__), " from this task.\n", " It will be executed sequentially ", # noqa: E501 "within the caller task.")) print(message, file=sys.stderr) return self._sequential_call(*args, **kwargs) # We are neither in master nor in the worker, or the user has # stopped the interactive session. # Therefore, the user code is being executed with no # launch_compss/enqueue_compss/runcompss/interactive session return self._sequential_call(*args, **kwargs)
def compss_persistent_worker(config): # type: (PiperWorkerConfiguration) -> None """ Persistent worker main function. Retrieves the initial configuration and spawns the worker processes. :param config: Piper Worker Configuration description. :return: None """ global CACHE global CACHE_PROCESS # Catch SIGTERM sent by bindings_piper signal.signal(signal.SIGTERM, shutdown_handler) # Set the binding in worker mode context.set_pycompss_context(context.WORKER) persistent_storage = (config.storage_conf != 'null') logger, logger_cfg, storage_loggers = load_loggers(config.debug, persistent_storage) if __debug__: logger.debug(HEADER + "piper_worker.py wake up") config.print_on_logger(logger) if persistent_storage: # Initialize storage logger.debug(HEADER + "Starting persistent storage") with event(INIT_STORAGE_AT_WORKER_EVENT): from storage.api import initWorker as initStorageAtWorker # noqa initStorageAtWorker(config_file_path=config.storage_conf) # Create new processes queues = [] # Setup cache if is_cache_enabled(config.cache): # Deploy the necessary processes CACHE = True cache_params = start_cache(logger, config.cache) else: # No cache CACHE = False cache_params = (None, None, None, None) smm, CACHE_PROCESS, cache_queue, cache_ids = cache_params # Create new executor processes conf = ExecutorConf(TRACING, config.storage_conf, logger, logger_cfg, storage_loggers, config.stream_backend, config.stream_master_name, config.stream_master_port, cache_ids, cache_queue) for i in range(0, config.tasks_x_node): if __debug__: logger.debug(HEADER + "Launching process " + str(i)) process_name = "".join(("Process-", str(i))) pid, queue = create_executor_process(process_name, conf, config.pipes[i]) queues.append(queue) # Read command from control pipe alive = True process_counter = config.tasks_x_node control_pipe = config.control_pipe while alive: command = control_pipe.read_command(retry_period=1) if command != "": line = command.split() if line[0] == ADD_EXECUTOR_TAG: process_name = "".join(("Process-", str(process_counter))) process_counter = process_counter + 1 in_pipe = line[1] out_pipe = line[2] pipe = Pipe(in_pipe, out_pipe) pid, queue = create_executor_process(process_name, conf, pipe) queues.append(queue) control_pipe.write(" ".join( (ADDED_EXECUTOR_TAG, out_pipe, in_pipe, str(pid)))) elif line[0] == QUERY_EXECUTOR_ID_TAG: in_pipe = line[1] out_pipe = line[2] proc = PROCESSES.get(in_pipe) pid = proc.pid control_pipe.write(" ".join( (REPLY_EXECUTOR_ID_TAG, out_pipe, in_pipe, str(pid)))) elif line[0] == CANCEL_TASK_TAG: in_pipe = line[1] proc = PROCESSES.get(in_pipe) pid = proc.pid if __debug__: logger.debug(HEADER + "Signaling process with PID " + str(pid) + " to cancel a task") os.kill( pid, signal.SIGUSR2) # NOSONAR cancellation produced by COMPSs elif line[0] == REMOVE_EXECUTOR_TAG: in_pipe = line[1] out_pipe = line[2] proc = PROCESSES.pop(in_pipe, None) if proc: if proc.is_alive(): logger.warn(HEADER + "Forcing terminate on : " + proc.name) proc.terminate() proc.join() control_pipe.write(" ".join( (REMOVED_EXECUTOR_TAG, out_pipe, in_pipe))) elif line[0] == PING_TAG: control_pipe.write(PONG_TAG) elif line[0] == QUIT_TAG: alive = False # Wait for all threads for proc in PROCESSES.values(): proc.join() # Check if there is any exception message from the threads for i in range(0, config.tasks_x_node): if not queues[i].empty: logger.error(HEADER + "Exception in threads queue: " + str(queues[i].get())) for queue in queues: queue.close() queue.join_thread() if CACHE: stop_cache(smm, cache_queue, CACHE_PROCESS) # noqa if persistent_storage: # Finish storage if __debug__: logger.debug(HEADER + "Stopping persistent storage") with event(FINISH_STORAGE_AT_WORKER_EVENT): from storage.api import finishWorker as finishStorageAtWorker # noqa finishStorageAtWorker() if __debug__: logger.debug(HEADER + "Finished") control_pipe.write(QUIT_TAG) control_pipe.close()
def executor(queue, process_name, pipe, conf): # type: (..., str, Pipe, ...) -> None """Thread main body - Overrides Threading run method. Iterates over the input pipe in order to receive tasks (with their parameters) and process them. Notifies the runtime when each task has finished with the corresponding output value. Finishes when the "quit" message is received. :param queue: Queue where to put exception messages. :param process_name: Process name (Thread-X, where X is the thread id). :param pipe: Pipe to receive and send messages from/to the runtime. :param conf: configuration of the executor. :return: None """ logger = conf.logger try: # Replace Python Worker's SIGTERM handler. signal.signal(signal.SIGTERM, shutdown_handler) tracing = conf.tracing storage_conf = conf.storage_conf storage_loggers = conf.storage_loggers # Get a copy of the necessary information from the logger to # re-establish after each task logger_handlers = copy.copy(logger.handlers) logger_level = logger.getEffectiveLevel() logger_formatter = logging.Formatter( logger_handlers[0].formatter._fmt) # noqa storage_loggers_handlers = [] for storage_logger in storage_loggers: storage_loggers_handlers.append(copy.copy(storage_logger.handlers)) # Establish link with the binding-commons to enable task nesting if __debug__: logger.debug(HEADER + "Establishing link with runtime in process " + str(process_name)) # noqa: E501 COMPSs.load_runtime(external_process=False, _logger=logger) COMPSs.set_pipes(pipe.output_pipe, pipe.input_pipe) if storage_conf != 'null': try: from storage.api import initWorkerPostFork # noqa with event(INIT_WORKER_POSTFORK_EVENT): initWorkerPostFork() except ImportError: if __debug__: logger.info( HEADER + "[%s] Could not find initWorkerPostFork storage call. Ignoring it." % # noqa: E501 str(process_name)) # Start the streaming backend if necessary streaming = False if conf.stream_backend not in [None, 'null', 'NONE']: streaming = True if streaming: # Initialize streaming logger.debug(HEADER + "Starting streaming for process " + str(process_name)) try: DistroStreamClientHandler.init_and_start( master_ip=conf.stream_master_ip, master_port=int(conf.stream_master_port)) except Exception as e: logger.error(e) raise e # Process properties alive = True if __debug__: logger.debug(HEADER + "[%s] Starting process" % str(process_name)) # MAIN EXECUTOR LOOP while alive: # Runtime -> pipe - Read command from pipe command = pipe.read_command(retry_period=0.5) if command != "": if __debug__: logger.debug(HEADER + "Received %s" % command) # Process the command alive = process_message(command, process_name, pipe, queue, tracing, logger, logger_handlers, logger_level, logger_formatter, storage_conf, storage_loggers, storage_loggers_handlers) # Stop storage if storage_conf != 'null': try: from storage.api import finishWorkerPostFork # noqa with event(FINISH_WORKER_POSTFORK_EVENT): finishWorkerPostFork() except ImportError: if __debug__: logger.info( HEADER + "[%s] Could not find finishWorkerPostFork storage call. Ignoring it." % # noqa: E501 str(process_name)) # Stop streaming if streaming: logger.debug(HEADER + "Stopping streaming for process " + str(process_name)) DistroStreamClientHandler.set_stop() sys.stdout.flush() sys.stderr.flush() if __debug__: logger.debug(HEADER + "[%s] Exiting process " % str(process_name)) pipe.write(QUIT_TAG) pipe.close() except BaseException as e: logger.error(e) raise e
def compss_persistent_worker(config): """ Persistent worker main function. Retrieves the initial configuration and spawns the worker processes. :param config: Piper Worker Configuration description :return: None """ # Catch SIGTERM sent by bindings_piper signal.signal(signal.SIGTERM, shutdown_handler) # Set the binding in worker mode context.set_pycompss_context(context.WORKER) persistent_storage = (config.storage_conf != 'null') logger, storage_loggers = load_loggers(config.debug, persistent_storage, config.tracing) if __debug__: logger.debug(HEADER + "piper_worker.py wake up") config.print_on_logger(logger) if persistent_storage: # Initialize storage logger.debug(HEADER + "Starting persistent storage") with event(INIT_STORAGE_AT_WORKER_EVENT): from storage.api import initWorker as initStorageAtWorker initStorageAtWorker(config_file_path=config.storage_conf) # Create new threads queues = [] for i in range(0, config.tasks_x_node): if __debug__: logger.debug(HEADER + "Launching process " + str(i)) process_name = 'Process-' + str(i) queue = Queue() queues.append(queue) conf = ExecutorConf(TRACING, config.storage_conf, logger, storage_loggers, config.stream_backend, config.stream_master_name, config.stream_master_port) process = Process(target=executor, args=(queue, process_name, config.pipes[i], conf)) PROCESSES[config.pipes[i].input_pipe] = process process.start() # Read command from control pipe alive = True process_counter = config.tasks_x_node control_pipe = config.control_pipe while alive: command = control_pipe.read_command(retry_period=1) if command != "": line = command.split() if line[0] == ADD_EXECUTOR_TAG: process_name = 'Process-' + str(process_counter) process_counter = process_counter + 1 in_pipe = line[1] out_pipe = line[2] pipe = Pipe(in_pipe, out_pipe) pid = create_threads(process_name, pipe) control_pipe.write(ADDED_EXECUTOR_TAG + " " + out_pipe + " " + in_pipe + " " + str(pid)) elif line[0] == QUERY_EXECUTOR_ID_TAG: in_pipe = line[1] out_pipe = line[2] proc = PROCESSES.get(in_pipe) pid = proc.pid control_pipe.write(REPLY_EXECUTOR_ID_TAG + " " + out_pipe + " " + in_pipe + " " + str(pid)) elif line[0] == CANCEL_TASK_TAG: in_pipe = line[1] proc = PROCESSES.get(in_pipe) pid = proc.pid logger.debug("[PYTHON WORKER] Signaling process with PID " + str(pid) + " to cancel a task") kill(pid, signal.SIGUSR2) elif line[0] == REMOVE_EXECUTOR_TAG: in_pipe = line[1] out_pipe = line[2] proc = PROCESSES.pop(in_pipe, None) if proc: if proc.is_alive(): logger.warn(HEADER + "Forcing terminate on : " + proc.name) proc.terminate() proc.join() control_pipe.write(REMOVED_EXECUTOR_TAG + " " + out_pipe + " " + in_pipe) elif line[0] == PING_TAG: control_pipe.write(PONG_TAG) elif line[0] == QUIT_TAG: alive = False # Wait for all threads for proc in PROCESSES.values(): proc.join() # Check if there is any exception message from the threads for i in range(0, config.tasks_x_node): if not queues[i].empty: logger.error(HEADER + "Exception in threads queue: " + str(queues[i].get())) for queue in queues: queue.close() queue.join_thread() if persistent_storage: # Finish storage if __debug__: logger.debug(HEADER + "Stopping persistent storage") with event(FINISH_STORAGE_AT_WORKER_EVENT): from storage.api import finishWorker as finishStorageAtWorker finishStorageAtWorker() if __debug__: logger.debug(HEADER + "Finished") control_pipe.write(QUIT_TAG) control_pipe.close()
def task_decorator(*args, **kwargs): # Determine the context and decide what to do if context.in_master(): # @task being executed in the master # Each task will have a TaskMaster, so its content will # not be shared. self.__check_core_element__(kwargs, user_function) with event(TASK_INSTANTIATION, master=True): master = TaskMaster( self.decorator_arguments, self.user_function, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints) result = master.call(*args, **kwargs) fo, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints = result # noqa: E501 del master return fo elif context.in_worker(): if 'compss_key' in kwargs.keys(): # @task being executed in the worker with event(WORKER_TASK_INSTANTIATION, master=False, inside=True): worker = TaskWorker(self.decorator_arguments, self.user_function) result = worker.call(*args, **kwargs) del worker return result else: if context.is_nesting_enabled(): # nested @task executed in the worker # Each task will have a TaskMaster, so its content will # not be shared. with event(TASK_INSTANTIATION, master=True): master = TaskMaster( self.decorator_arguments, self.user_function, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints) result = master.call(*args, **kwargs) fo, self.core_element, self.registered, self.signature, self.interactive, self.module, self.function_arguments, self.function_name, self.module_name, self.function_type, self.class_name, self.hints = result # noqa: E501 del master return fo else: # Called from another task within the worker # Ignore the @task decorator and run it sequentially message = "".join(( "WARNING: Calling task: ", str(user_function.__name__), " from this task.\n", " It will be executed sequentially ", # noqa: E501 "within the caller task.")) print(message, file=sys.stderr) return self._sequential_call(*args, **kwargs) # We are neither in master nor in the worker, or the user has # stopped the interactive session. # Therefore, the user code is being executed with no # launch_compss/enqueue_compss/runcompss/interactive session return self._sequential_call(*args, **kwargs)
def launch_pycompss_application(app, func, log_level="off", # type: str o_c=False, # type: bool debug=False, # type: bool graph=False, # type: bool trace=False, # type: bool monitor=None, # type: int project_xml=None, # type: str resources_xml=None, # type: str summary=False, # type: bool task_execution="compss", # type: str storage_impl=None, # type: str storage_conf=None, # type: str streaming_backend=None, # type: str streaming_master_name=None, # type: str streaming_master_port=None, # type: str task_count=50, # type: int app_name=None, # type: str uuid=None, # type: str base_log_dir=None, # type: str specific_log_dir=None, # type: str extrae_cfg=None, # type: str comm="NIO", # type: str conn=DEFAULT_CONN, # type: str master_name="", # type: str master_port="", # type: str scheduler=DEFAULT_SCHED, # type: str jvm_workers=DEFAULT_JVM_WORKERS, # type: str cpu_affinity="automatic", # type: str gpu_affinity="automatic", # type: str fpga_affinity="automatic", # type: str fpga_reprogram="", # type: str profile_input="", # type: str profile_output="", # type: str scheduler_config="", # type: str external_adaptation=False, # type: bool propagate_virtual_environment=True, # noqa type: bool mpi_worker=False, # type: bool worker_cache=False, # type: bool or str *args, **kwargs ): # NOSONAR # type: (...) -> None """ Launch PyCOMPSs application from function. :param app: Application path :param func: Function :param log_level: Logging level [ "trace"|"debug"|"info"|"api"|"off" ] (default: "off") :param o_c: Objects to string conversion [ True | False ] (default: False) :param debug: Debug mode [ True | False ] (default: False) (overrides log_level) :param graph: Generate graph [ True | False ] (default: False) :param trace: Generate trace [ True | False | "scorep" | "arm-map" | "arm-ddt"] (default: False) :param monitor: Monitor refresh rate (default: None) :param project_xml: Project xml file path :param resources_xml: Resources xml file path :param summary: Execution summary [ True | False ] (default: False) :param task_execution: Task execution (default: "compss") :param storage_impl: Storage implementation path :param storage_conf: Storage configuration file path :param streaming_backend: Streaming backend (default: None) :param streaming_master_name: Streaming master name (default: None) :param streaming_master_port: Streaming master port (default: None) :param task_count: Task count (default: 50) :param app_name: Application name (default: Interactive_date) :param uuid: UUId :param base_log_dir: Base logging directory :param specific_log_dir: Specific logging directory :param extrae_cfg: Extrae configuration file path :param comm: Communication library (default: NIO) :param conn: Connector (default: DefaultSSHConnector) :param master_name: Master Name (default: "") :param master_port: Master port (default: "") :param scheduler: Scheduler (default: es.bsc.compss.scheduler.loadbalancing.LoadBalancingScheduler) :param jvm_workers: Java VM parameters (default: "-Xms1024m,-Xmx1024m,-Xmn400m") :param cpu_affinity: CPU Core affinity (default: "automatic") :param gpu_affinity: GPU Core affinity (default: "automatic") :param fpga_affinity: FPA Core affinity (default: "automatic") :param fpga_reprogram: FPGA repogram command (default: "") :param profile_input: Input profile (default: "") :param profile_output: Output profile (default: "") :param scheduler_config: Scheduler configuration (default: "") :param external_adaptation: External adaptation [ True | False ] (default: False) :param propagate_virtual_environment: Propagate virtual environment [ True | False ] (default: False) :param mpi_worker: Use the MPI worker [ True | False ] (default: False) :param worker_cache: Use the worker cache [ True | int(size) | False] (default: False) :param args: Positional arguments :param kwargs: Named arguments :return: Execution result """ # Check that COMPSs is available if "COMPSS_HOME" not in os.environ: # Do not allow to continue if COMPSS_HOME is not defined raise PyCOMPSsException("ERROR: COMPSS_HOME is not defined in the environment") # noqa: E501 # Let the Python binding know we are at master context.set_pycompss_context(context.MASTER) # Then we can import the appropriate start and stop functions from the API from pycompss.api.api import compss_start, compss_stop ############################################################## # INITIALIZATION ############################################################## if debug: log_level = "debug" # Initial dictionary with the user defined parameters all_vars = parameters_to_dict(log_level, debug, o_c, graph, trace, monitor, project_xml, resources_xml, summary, task_execution, storage_impl, storage_conf, streaming_backend, streaming_master_name, streaming_master_port, task_count, app_name, uuid, base_log_dir, specific_log_dir, extrae_cfg, comm, conn, master_name, master_port, scheduler, jvm_workers, cpu_affinity, gpu_affinity, fpga_affinity, fpga_reprogram, profile_input, profile_output, scheduler_config, external_adaptation, propagate_virtual_environment, mpi_worker, worker_cache) # Save all vars in global current flags so that events.py can restart # the notebook with the same flags export_current_flags(all_vars) # Check the provided flags flags, issues = check_flags(all_vars) if not flags: print_flag_issues(issues) return None # Prepare the environment env_vars = prepare_environment(False, o_c, storage_impl, app, debug, trace, mpi_worker) all_vars.update(env_vars) monitoring_vars = prepare_loglevel_graph_for_monitoring(monitor, graph, debug, log_level) all_vars.update(monitoring_vars) if RUNNING_IN_SUPERCOMPUTER: updated_vars = updated_variables_in_sc() all_vars.update(updated_vars) to_update = prepare_tracing_environment(all_vars["trace"], all_vars["extrae_lib"], all_vars["ld_library_path"]) all_vars["trace"], all_vars["ld_library_path"] = to_update inf_vars = check_infrastructure_variables(all_vars["project_xml"], all_vars["resources_xml"], all_vars["compss_home"], all_vars["app_name"], all_vars["file_name"], all_vars["external_adaptation"]) all_vars.update(inf_vars) create_init_config_file(**all_vars) ############################################################## # RUNTIME START ############################################################## # Runtime start compss_start(log_level, all_vars["trace"], True) # Setup logging binding_log_path = get_log_path() log_path = os.path.join(all_vars["compss_home"], "Bindings", "python", str(all_vars["major_version"]), "log") set_temporary_directory(binding_log_path) logging_cfg_file = get_logging_cfg_file(log_level) init_logging(os.path.join(log_path, logging_cfg_file), binding_log_path) logger = logging.getLogger("pycompss.runtime.launch") logger.debug("--- START ---") logger.debug("PyCOMPSs Log path: %s" % log_path) if storage_impl and storage_conf: logger.debug("Starting storage") persistent_storage = master_init_storage(all_vars["storage_conf"], logger) else: persistent_storage = False logger.debug("Starting streaming") streaming = init_streaming(all_vars["streaming_backend"], all_vars["streaming_master_name"], all_vars["streaming_master_port"]) saved_argv = sys.argv sys.argv = args # Execution: with event(APPLICATION_RUNNING_EVENT, master=True): if func is None or func == "__main__": if IS_PYTHON3: exec(open(app).read()) else: execfile(app) # noqa result = None else: if IS_PYTHON3: from importlib.machinery import SourceFileLoader # noqa imported_module = SourceFileLoader(all_vars["file_name"], app).load_module() # noqa else: import imp # noqa imported_module = imp.load_source(all_vars["file_name"], app) # noqa method_to_call = getattr(imported_module, func) try: result = method_to_call(*args, **kwargs) except TypeError: result = method_to_call() # Recover the system arguments sys.argv = saved_argv # Stop streaming if streaming: stop_streaming() # Stop persistent storage if persistent_storage: master_stop_storage(logger) logger.debug("--- END ---") ############################################################## # RUNTIME STOP ############################################################## # Stop runtime compss_stop() clean_log_configs() return result
def compss_main(): # type: () -> None """ PyCOMPSs main function. General call: python $PYCOMPSS_HOME/pycompss/runtime/launch.py $wall_clock $log_level $PyObject_serialize $storage_conf $streaming_backend $streaming_master_name $streaming_master_port $fullAppPath $application_args :return: None """ global APP_PATH global STREAMING global PERSISTENT_STORAGE global LOGGER # Let the Python binding know we are at master context.set_pycompss_context(context.MASTER) # Then we can import the appropriate start and stop functions from the API from pycompss.api.api import compss_start # noqa from pycompss.api.api import compss_stop # noqa from pycompss.api.api import compss_set_wall_clock # noqa # See parse_arguments, defined above # In order to avoid parsing user arguments, we are going to remove user # args from sys.argv user_sys_argv = sys.argv[10:] sys.argv = sys.argv[:10] args = parse_arguments() # We are done, now sys.argv must contain user args only sys.argv = [args.app_path] + user_sys_argv # Get log_level log_level = args.log_level # Setup tracing tracing = int(args.tracing) # Load user imports before starting the runtime (can be avoided if # ENVIRONMENT_VARIABLE_LOAD is set to false). # Reason: some cases like autoparallel can require to avoid loading. if ENVIRONMENT_VARIABLE_LOAD not in os.environ \ or (ENVIRONMENT_VARIABLE_LOAD in os.environ and os.environ[ENVIRONMENT_VARIABLE_LOAD] != "false"): with context.loading_context(): __load_user_module__(args.app_path, log_level) # Start the runtime compss_start(log_level, tracing, False) # Register @implements core elements (they can not be registered in # __load_user__module__). __register_implementation_core_elements__() # Get application wall clock limit wall_clock = int(args.wall_clock) if wall_clock > 0: compss_set_wall_clock(wall_clock) # Get object_conversion boolean set_object_conversion(args.object_conversion == "true") # Get storage configuration at master storage_conf = args.storage_configuration # Get application execution path APP_PATH = args.app_path # Setup logging binding_log_path = get_log_path() log_path = os.path.join(os.getenv("COMPSS_HOME"), "Bindings", "python", str(_PYTHON_VERSION), "log") set_temporary_directory(binding_log_path) logging_cfg_file = get_logging_cfg_file(log_level) init_logging(os.path.join(log_path, logging_cfg_file), binding_log_path) LOGGER = logging.getLogger("pycompss.runtime.launch") # Get JVM options # jvm_opts = os.environ["JVM_OPTIONS_FILE"] # from pycompss.util.jvm.parser import convert_to_dict # opts = convert_to_dict(jvm_opts) # storage_conf = opts.get("-Dcompss.storage.conf") exit_code = 0 try: if __debug__: LOGGER.debug('--- START ---') LOGGER.debug('PyCOMPSs Log path: %s' % binding_log_path) # Start persistent storage PERSISTENT_STORAGE = master_init_storage(storage_conf, LOGGER) # Start STREAMING STREAMING = init_streaming(args.streaming_backend, args.streaming_master_name, args.streaming_master_port) # Show module warnings if __debug__: show_optional_module_warnings() # MAIN EXECUTION with event(APPLICATION_RUNNING_EVENT, master=True): # MAIN EXECUTION if IS_PYTHON3: with open(APP_PATH) as f: exec(compile(f.read(), APP_PATH, "exec"), globals()) else: execfile(APP_PATH, globals()) # noqa # End if __debug__: LOGGER.debug('--- END ---') except SystemExit as e: # NOSONAR - reraising would not allow to stop the runtime gracefully. if e.code != 0: print("[ ERROR ]: User program ended with exitcode %s." % e.code) print("\t\tShutting down runtime...") exit_code = e.code except SerializerException: exit_code = 1 # If an object that can not be serialized has been used as a parameter. print("[ ERROR ]: Serialization exception") exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) for line in lines: if APP_PATH in line: print("[ ERROR ]: In: %s", line) exit_code = 1 except COMPSsException as e: # Any other exception occurred print("[ ERROR ]: A COMPSs exception occurred: " + str(e)) traceback.print_exc() exit_code = 0 # COMPSs exception is not considered an error except Exception as e: # Any other exception occurred print("[ ERROR ]: An exception occurred: " + str(e)) traceback.print_exc() exit_code = 1 finally: # Stop runtime stop_all(exit_code) clean_log_configs()
def main(): # type: () -> None """ GAT worker main code. Executes the task provided by parameters. :return: None """ # Emit sync event if tracing is enabled tracing = sys.argv[1] == 'true' task_id = int(sys.argv[2]) log_level = sys.argv[3] storage_conf = sys.argv[4] stream_backend = sys.argv[5] stream_master_name = sys.argv[6] stream_master_port = sys.argv[7] # Next: method_type = sys.argv[8] params = sys.argv[9:] # Next parameters: # class_name = sys.argv[10] # method_name = sys.argv[11] # num_slaves = sys.argv[12] # i = 13 + num_slaves # slaves = sys.argv[12..i] # numCus = sys.argv[i+1] # has_target = sys.argv[i+2] == 'true' # num_params = int(sys.argv[i+3]) # params = sys.argv[i+4..] if log_level == "true" or log_level == "debug": print("Tracing = " + str(tracing)) print("Task id = " + str(task_id)) print("Log level = " + str(log_level)) print("Storage conf = " + str(storage_conf)) persistent_storage = False if storage_conf != "null": persistent_storage = True streaming = False if stream_backend not in [None, "null", "NONE"]: streaming = True with trace_multiprocessing_worker() if tracing else dummy_context(): if streaming: # Start streaming DistroStreamClientHandler.init_and_start( master_ip=stream_master_name, master_port=stream_master_port) # Load log level configuration file worker_path = os.path.dirname(os.path.realpath(__file__)) if log_level == "true" or log_level == "debug": # Debug log_json = "".join( (worker_path, "/../../../log/logging_gat_worker_debug.json")) elif log_level == "info" or log_level == "off": # Info or no debug log_json = "".join( (worker_path, "/../../../log/logging_gat_worker_off.json")) else: # Default log_json = "".join( (worker_path, "/../../../log/logging_gat_worker.json")) init_logging_worker(log_json, tracing) if persistent_storage: # Initialize storage with event(INIT_STORAGE_AT_WORKER_EVENT): from storage.api import initWorker as initStorageAtWorker # noqa initStorageAtWorker(config_file_path=storage_conf) # Init worker exit_code = compss_worker(tracing, str(task_id), storage_conf, params, log_json) if streaming: # Finish streaming DistroStreamClientHandler.set_stop() if persistent_storage: # Finish storage with event(FINISH_STORAGE_AT_WORKER_EVENT): from storage.api import finishWorker as finishStorageAtWorker # noqa finishStorageAtWorker() if exit_code == 1: exit(1)