def testObjectArray(self): """ Tests de-/serialization of object np.arrays""" arr = np.array(np.random.random((50, 50)), dtype=object) tmp_file = tempfile.NamedTemporaryFile() serialize_to_file(arr, tmp_file.name) deserialize_from_file(tmp_file.name)
def getByID(id): """ This functions retrieves an object from an external storage technology from the obj object. This dummy returns the same object as submited by the parameter obj. @param obj: key/object of the object to be retrieved. @return: the real object. """ #print "-----------------------------------------------------" #print "| WARNING!!! - YOU ARE USING THE DUMMY STORAGE API. |" #print "| Call to: getByID |" #print "| ********************************************* |" #print "| *** Check that you really want to use the *** |" #print "| ************* dummy storage api ************* |" #print "| ********************************************* |" #print "-----------------------------------------------------" if id is not None: try: file_name = id + '.PSCO' file_path = storage_path + file_name obj = deserialize_from_file(file_path) obj.setID(id) return obj except ValueError: # The id does not complain uuid4 --> raise an exception print( "Error: the ID for getByID does not complain the uuid4 format." ) raise ValueError( 'Using the dummy storage API getByID with wrong id.') else: # Using a None id --> raise an exception print("Error: the ID for getByID is None.") raise ValueError('Using the dummy storage API getByID with None id.')
def test_serialize_deserialize_obj_to_file(): # Uses serialize to handler underneath. from pycompss.util.serialization.serializer import serialize_to_file from pycompss.util.serialization.serializer import deserialize_from_file target_file = "target.pkl" obj = [1, 3, 2, "hello", "world"] serialize_to_file(obj, target_file) result = deserialize_from_file(target_file) os.remove(target_file) assert obj == result, "ERROR: Object serialization and deserialization retrieved wrong object." # noqa: E501
def test_serialize_deserialize_obj_to_file_no_gc(): # Uses serialize to handler underneath. import pycompss.util.serialization.serializer as serializer from pycompss.util.serialization.serializer import serialize_to_file from pycompss.util.serialization.serializer import deserialize_from_file serializer.DISABLE_GC = True target_file = "target.pkl" obj = [1, 3, 2, "hello", "world"] serialize_to_file(obj, target_file) result = deserialize_from_file(target_file) os.remove(target_file) assert obj == result, "ERROR: Object serialization and deserialization (without garbage collector) retrieved wrong object." # noqa: E501
def test_serialize_deserialize_np_to_file(): # Uses serialize to handler underneath. from pycompss.util.serialization.serializer import serialize_to_file from pycompss.util.serialization.serializer import deserialize_from_file target_file_np = "target_np.pkl" obj_np = np.random.rand(4, 4) serialize_to_file(obj_np, target_file_np) result_np = deserialize_from_file(target_file_np) os.remove(target_file_np) assert np.array_equal( obj_np, result_np ), "ERROR: Numpy object serialization and deserialization retrieved wrong object." # noqa: E501
def test_serialize_deserialize_np_to_file_no_gc(): # Uses serialize to handler underneath. import pycompss.util.serialization.serializer as serializer from pycompss.util.serialization.serializer import serialize_to_file from pycompss.util.serialization.serializer import deserialize_from_file serializer.DISABLE_GC = True target_file_np = "target_np.pkl" obj_np = np.random.rand(4, 4) serialize_to_file(obj_np, target_file_np) result_np = deserialize_from_file(target_file_np) os.remove(target_file_np) assert np.array_equal( obj_np, result_np ), "ERROR: Numpy object serialization and deserialization (without garbage collector) retrieved wrong object." # noqa: E501
def test_serialize_objects(): from pycompss.util.serialization.serializer import serialize_objects from pycompss.util.serialization.serializer import deserialize_from_file obj1 = ([1, 2, 3, 4], "obj1.pkl") obj2 = ({"hello": "mars", "goodbye": "world"}, "obj2.pkl") obj3 = (np.random.rand(3, 3), "obj3.pkl") objects = [obj1, obj2, obj3] serialize_objects(objects) result = [] for obj in objects: result.append(deserialize_from_file(obj[1])) os.remove(obj1[1]) os.remove(obj2[1]) os.remove(obj3[1]) assert len(result) == len( objects), "ERROR: Wrong number of objects retrieved." # noqa: E501 assert result[0] == obj1[0], "ERROR: Wrong first object." assert result[1] == obj2[0], "ERROR: Wrong second object." assert np.array_equal(result[2], obj3[0]), "ERROR: Wrong third object."
def get_by_id(id): # noqa # type: (str) -> typing.Any """ This functions retrieves an object from an external storage technology from the obj object. This dummy returns the same object as submited by the parameter obj. :param id: key of the object to be retrieved. :return: the real object. """ # Warning message: # print "-----------------------------------------------------" # print "| WARNING!!! - YOU ARE USING THE DUMMY STORAGE API. |" # print "| Call to: get_by_id |" # print "| ********************************************* |" # print "| *** Check that you really want to use the *** |" # print "| ************* dummy storage api ************* |" # print "| ********************************************* |" # print "-----------------------------------------------------" if id is not None: try: file_name = id + '.PSCO' file_path = STORAGE_PATH + file_name obj = deserialize_from_file(file_path) obj.setID(id) # noqa return obj except ValueError: # The id does not complain uuid4 --> raise an exception print( "Error: the ID for get_by_id does not complain the uuid4 format." ) # noqa: E501 raise ValueError( 'Using the dummy storage API get_by_id with wrong id.' ) # noqa: E501 else: # Using a None id --> raise an exception print("Error: the ID for get_by_id is None.") raise ValueError('Using the dummy storage API get_by_id with None id.')
def evaluate_worker(worker, name, pipes, files, current_path, executor_out, executor_in, worker_out, worker_in): temp_folder, executor_outbound, executor_inbound, control_worker_outbound, control_worker_inbound = files # noqa: E501 print("Starting " + name + " worker") worker.start() print("Temp folder: " + temp_folder) # Wait 2 seconds to start the worker. print("Waiting 2 seconds to send a task request") time.sleep(2) # Run a simple task job1_out = tempfile.NamedTemporaryFile(delete=False).name job1_err = tempfile.NamedTemporaryFile(delete=False).name simple_task_message = [ "EXECUTE_TASK", "1", job1_out, job1_err, "0", "1", "true", "null", "METHOD", "common_piper_tester", "simple", "0", "1", "localhost", "1", "false", "None", "0", "0", "-", "0", "0", ] simple_task_message_str = " ".join(simple_task_message) print("Requesting: " + simple_task_message_str) if IS_PYTHON3: os.write(executor_out, (simple_task_message_str + "\n").encode()) # noqa else: os.write(executor_out, simple_task_message_str + "\n") # noqa time.sleep(2) # Run a increment task job2_out = tempfile.NamedTemporaryFile(delete=False).name job2_err = tempfile.NamedTemporaryFile(delete=False).name job2_result = tempfile.NamedTemporaryFile(delete=False).name increment_task_message = [ "EXECUTE_TASK", "2", job2_out, job2_err, "0", "1", "true", "null", "METHOD", "common_piper_tester", "increment", "0", "1", "localhost", "1", "false", "9", "1", "2", "4", "3", "null", "value", "null", "1", "9", "3", "#", "$return_0", "null", job2_result + ":d1v2_1599560599402.IT:false:true:" + job2_result, "-", "0", "0", ] increment_task_message_str = " ".join(increment_task_message) print("Requesting: " + increment_task_message_str) if IS_PYTHON3: os.write(executor_out, (increment_task_message_str + "\n").encode()) # noqa else: os.write(executor_out, increment_task_message_str + "\n") # noqa time.sleep(2) # Send quit message os.write(executor_out, b"QUIT\n") os.write(worker_out, b"QUIT\n") # Wait for the worker to finish worker.join() # Cleanup # Close pipes os.close(executor_out) os.close(executor_in) os.close(worker_out) os.close(worker_in) # Remove pipes for pipe in pipes: os.unlink(pipe) if os.path.isfile(pipe): os.remove(pipe) # Check logs out_log = os.path.join(temp_folder, "binding_worker.out") err_log = os.path.join(temp_folder, "binding_worker.err") if os.path.exists(err_log): raise PyCOMPSsException(ERROR_MESSAGE + err_log) with open(out_log, "r") as f: if "ERROR" in f.read(): raise PyCOMPSsException(ERROR_MESSAGE + out_log) if "Traceback" in f.read(): raise PyCOMPSsException(ERROR_MESSAGE + out_log) # Check task 1 check_task(job1_out, job1_err) # Check task 2 check_task(job2_out, job2_err) result = deserialize_from_file(job2_result) if result != 2: raise PyCOMPSsException( "Wrong result obtained for increment task. Expected 2, received: " + # noqa: E501 str(result)) # Remove logs os.remove(job1_out) os.remove(job1_err) os.remove(job2_out) os.remove(job2_err) os.remove(job2_result) if os.path.isfile(err_log): os.remove(err_log) if os.path.isfile(out_log): os.remove(out_log) if os.path.isfile(current_path + STD_OUT_FILE): os.remove(current_path + STD_OUT_FILE) if os.path.isfile(current_path + STD_ERR_FILE): os.remove(current_path + STD_ERR_FILE) shutil.rmtree(temp_folder) if os.path.isfile(executor_outbound): os.remove(executor_outbound) if os.path.isfile(executor_inbound): os.remove(executor_inbound) if os.path.isfile(control_worker_outbound): os.remove(control_worker_outbound) if os.path.isfile(control_worker_inbound): os.remove(control_worker_inbound)
def test_piper_worker(): # Override sys.argv to mimic runtime call sys_argv_backup = list(sys.argv) sys_path_backup = list(sys.path) sys.argv = [ 'piper_worker.py', '/tmp/', 'false', 'true', 0, 'null', 'NONE', 'localhost', '49049', '1', '/tmp/pipe_-504901196_executor0.outbound', '/tmp/pipe_-504901196_executor0.inbound', '/tmp/pipe_-504901196_control_worker.outbound', # noqa: E501 '/tmp/pipe_-504901196_control_worker.inbound' ] # noqa: E501 pipes = sys.argv[-4:] # Create pipes for pipe in pipes: if os.path.exists(pipe): os.remove(pipe) os.mkfifo(pipe) # Open pipes executor_out = os.open(pipes[0], os.O_RDWR) executor_in = os.open(pipes[1], os.O_RDWR) worker_out = os.open(pipes[2], os.O_RDWR) worker_in = os.open(pipes[3], os.O_RDWR) current_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(current_path) # Start the piper worker in a separate thread worker = multiprocessing.Process(target=worker_thread, args=(sys.argv, current_path)) print("Starting piper worker") worker.start() # Wait 2 seconds to start the worker. print("Waiting 2 seconds to send a task request") time.sleep(2) # Run a simple task job1_out = '/tmp/job1_NEW.out' job1_err = '/tmp/job1_NEW.err' simple_task_message = [ 'EXECUTE_TASK', '1', job1_out, job1_err, '0', '1', 'true', 'null', 'METHOD', 'test_piper', 'simple', '0', '1', 'localhost', '1', 'false', 'None', '0', '0', '-', '0', '0' ] simple_task_message_str = " ".join(simple_task_message) print("Requesting: " + simple_task_message_str) os.write(executor_out, simple_task_message_str + '\n') # noqa time.sleep(2) # Run a increment task job2_out = '/tmp/job2_NEW.out' job2_err = '/tmp/job2_NEW.err' job2_result = '/tmp/job2.IT' increment_task_message = [ 'EXECUTE_TASK', '2', job2_out, job2_err, '0', '1', 'true', 'null', 'METHOD', 'test_piper', 'increment', '0', '1', 'localhost', '1', 'false', '9', '1', '2', '4', '3', 'null', 'value', 'null', '1', '9', '3', '#', '$return_0', 'null', job2_result + ':d1v2_1599560599402.IT:false:true:' + job2_result, '-', '0', '0' ] increment_task_message_str = " ".join(increment_task_message) print("Requesting: " + increment_task_message_str) os.write(executor_out, increment_task_message_str + '\n') # noqa time.sleep(2) # Send quit message os.write(executor_out, b"QUIT\n") os.write(worker_out, b"QUIT\n") # Wait for the worker to finish worker.join() # Cleanup # Close pipes os.close(executor_out) os.close(executor_in) os.close(worker_out) os.close(worker_in) # Remove pipes for pipe in pipes: os.unlink(pipe) if os.path.isfile(pipe): os.remove(pipe) # Check logs out_log = "log/binding_worker.out" err_log = "log/binding_worker.err" if os.path.exists(err_log): raise Exception("An error happened. Please check " + err_log) with open(out_log, 'r') as f: if 'ERROR' in f.read(): raise Exception("An error happened. Please check " + out_log) if 'Traceback' in f.read(): raise Exception("An error happened. Please check " + out_log) # Check task 1 check_task(job1_out, job1_err) # Check task 2 check_task(job2_out, job2_err) result = deserialize_from_file(job2_result) if result != 2: raise Exception( "Wrong result obtained for increment task. Expected 2, received: " + str(result)) # noqa # Remove logs os.remove(job1_out) os.remove(job1_err) os.remove(job2_out) os.remove(job2_err) os.remove(job2_result) if os.path.isfile(err_log): os.remove(err_log) if os.path.isfile(out_log): os.remove(out_log) if os.path.isfile(current_path + "/../../../../std.out"): os.remove(current_path + "/../../../../std.out") if os.path.isfile(current_path + "/../../../../std.err"): os.remove(current_path + "/../../../../std.err") # Restore sys.argv and sys.path sys.argv = sys_argv_backup sys.path = sys_path_backup
def execute_task(process_name, storage_conf, params, tracing, logger, python_mpi=False): """ ExecuteTask main method. :param process_name: Process name :param storage_conf: Storage configuration file path :param params: List of parameters :param tracing: Tracing flag :param logger: Logger to use :param python_mpi: If it is a MPI task :return: exit code, new types and new values """ if __debug__: logger.debug("Begin task execution in %s" % process_name) persistent_storage = False if storage_conf != 'null': persistent_storage = True # Retrieve the parameters from the params argument path = params[0] method_name = params[1] num_slaves = int(params[3]) time_out = int(params[2]) slaves = [] for i in range(3, 3 + num_slaves): slaves.append(params[i]) arg_position = 4 + num_slaves args = params[arg_position:] cus = args[0] args = args[1:] has_target = args[0] return_type = args[1] return_length = int(args[2]) num_params = int(args[3]) args = args[4:] # COMPSs keywords for tasks (ie: tracing, process name...) # compss_key is included to be checked in the @task decorator, so that # the task knows if it has been called from the worker or from the # user code (reason: ignore @task decorator if called from another task). compss_kwargs = { 'compss_key': True, 'compss_tracing': tracing, 'compss_process_name': process_name, 'compss_storage_conf': storage_conf, 'compss_return_length': return_length, 'python_MPI': python_mpi } if __debug__: logger.debug("Storage conf: %s" % str(storage_conf)) logger.debug("Params: %s" % str(params)) logger.debug("Path: %s" % str(path)) logger.debug("Method name: %s" % str(method_name)) logger.debug("Num slaves: %s" % str(num_slaves)) logger.debug("Slaves: %s" % str(slaves)) logger.debug("Cus: %s" % str(cus)) logger.debug("Has target: %s" % str(has_target)) logger.debug("Num Params: %s" % str(num_params)) logger.debug("Return Length: %s" % str(return_length)) logger.debug("Args: %r" % args) # Get all parameter values if __debug__: logger.debug("Processing parameters:") values = get_input_params(num_params, logger, args) types = [x.type for x in values] if __debug__: logger.debug("RUN TASK with arguments:") logger.debug("\t- Path: %s" % path) logger.debug("\t- Method/function name: %s" % method_name) logger.debug("\t- Has target: %s" % str(has_target)) logger.debug("\t- # parameters: %s" % str(num_params)) logger.debug("\t- Values:") for v in values: logger.debug("\t\t %r" % v) logger.debug("\t- COMPSs types:") for t in types: logger.debug("\t\t %s" % str(t)) import_error = False new_types = [] new_values = [] timed_out = False try: # Try to import the module (for functions) if __debug__: logger.debug("Trying to import the user module: %s" % path) py_version = sys.version_info if py_version >= (2, 7): import importlib module = importlib.import_module(path) # Python 2.7 if path.startswith('InteractiveMode_'): # Force reload in interactive mode. The user may have # overwritten a function or task. if py_version < (3, 0): reload(module) elif py_version < (3, 4): import imp imp.reload(module) else: importlib.reload(module) if __debug__: msg = "Module successfully loaded (Python version >= 2.7)" logger.debug(msg) else: module = __import__(path, globals(), locals(), [path], -1) if __debug__: msg = "Module successfully loaded (Python version < 2.7" logger.debug(msg) except ImportError: if __debug__: msg = "Could not import the module. Reason: Method in class." logger.debug(msg) import_error = True if not import_error: # Module method declared as task result = task_execution(logger, process_name, module, method_name, time_out, types, values, compss_kwargs, persistent_storage, storage_conf) exit_code = result[0] new_types = result[1] new_values = result[2] target_direction = result[3] timed_out = result[4] except_msg = result[5] if exit_code != 0: return exit_code, new_types, new_values, timed_out, except_msg else: # Method declared as task in class # Not the path of a module, it ends with a class name class_name = path.split('.')[-1] module_name = '.'.join(path.split('.')[0:-1]) if '.' in path: module_name = '.'.join(path.split('.')[0:-1]) else: module_name = path try: module = __import__(module_name, fromlist=[class_name]) klass = getattr(module, class_name) except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.exception("EXCEPTION IMPORTING MODULE IN %s" % process_name) logger.exception(''.join(line for line in lines)) return 1, [], [], False, None if __debug__: logger.debug("Method in class %s of module %s" % (class_name, module_name)) logger.debug("Has target: %s" % str(has_target)) if has_target == 'true': # Instance method # The self object needs to be an object in order to call the # function. So, it can not be done in the @task decorator. # Since the args structure is parameters + self + returns we pop # the corresponding considering the return_length notified by the # runtime (-1 due to index starts from 0). self_index = num_params - return_length - 1 self_elem = values.pop(self_index) self_type = types.pop(self_index) if self_type == parameter.TYPE.EXTERNAL_PSCO: if __debug__: logger.debug("Last element (self) is a PSCO with id: %s" % str(self_elem.key)) obj = get_by_id(self_elem.key) else: obj = None file_name = None if self_elem.key is None: file_name = self_elem.file_name.split(':')[-1] if __debug__: logger.debug("Deserialize self from file.") try: obj = deserialize_from_file(file_name) except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception( exc_type, exc_value, exc_traceback) logger.exception("EXCEPTION DESERIALIZING SELF IN %s" % process_name) logger.exception(''.join(line for line in lines)) return 1, [], [], False, None if __debug__: logger.debug('Deserialized self object is: %s' % self_elem.content) logger.debug( "Processing callee, a hidden object of %s in file %s" % # noqa: E501 (file_name, type(self_elem.content))) values.insert(0, obj) if not self_type == parameter.TYPE.EXTERNAL_PSCO: types.insert(0, parameter.TYPE.OBJECT) else: types.insert(0, parameter.TYPE.EXTERNAL_PSCO) result = task_execution(logger, process_name, klass, method_name, time_out, types, values, compss_kwargs, persistent_storage, storage_conf) exit_code = result[0] new_types = result[1] new_values = result[2] target_direction = result[3] timed_out = result[4] except_msg = result[5] if exit_code != 0: return exit_code, new_types, new_values, timed_out, except_msg # Depending on the target_direction option, it is necessary to # serialize again self or not. Since this option is only visible # within the task decorator, the task_execution returns the value # of target_direction in order to know here if self has to be # serialized. This solution avoids to use inspect. if target_direction.direction == parameter.DIRECTION.INOUT or \ target_direction.direction == parameter.DIRECTION.COMMUTATIVE: # noqa: E501 if is_psco(obj): # There is no explicit update if self is a PSCO. # Consequently, the changes on the PSCO must have been # pushed into the storage automatically on each PSCO # modification. if __debug__: logger.debug("The changes on the PSCO must have been" + " automatically updated by the storage.") pass else: if __debug__: logger.debug("Serializing self to file: %s" % file_name) try: serialize_to_file(obj, file_name) except: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception( exc_type, exc_value, exc_traceback) logger.exception("EXCEPTION SERIALIZING SELF IN %s" % process_name) logger.exception(''.join(line for line in lines)) return 1, new_types, new_values, timed_out, except_msg if __debug__: logger.debug("Obj: %r" % obj) else: # Class method - class is not included in values (e.g. values=[7]) types.append(None) # class must be first type result = task_execution(logger, process_name, klass, method_name, time_out, types, values, compss_kwargs, persistent_storage, storage_conf) exit_code = result[0] new_types = result[1] new_values = result[2] target_direction = result[3] timed_out = result[4] except_msg = result[5] if exit_code != 0: return exit_code, new_types, new_values, timed_out, except_msg # EVERYTHING OK if __debug__: logger.debug("End task execution. Status: Ok") return exit_code, new_types, new_values, timed_out, except_msg
def execute_task(process_name, # type: str storage_conf, # type: str params, # type: list tracing, # type: bool logger, # type: typing.Any logger_cfg, # type: str log_files, # type: tuple python_mpi=False, # type: bool collections_layouts=None, # type: dict cache_queue=None, # type: typing.Any cache_ids=None, # type: typing.Any cache_profiler=False, # type: bool ): # type: (...) -> typing.Tuple[int, list, list, typing.Optional[bool], str] """ ExecuteTask main method. :param process_name: Process name. :param storage_conf: Storage configuration file path. :param params: List of parameters. :param tracing: Tracing flag. :param logger: Logger to use. :param logger_cfg: Logger configuration file :param log_files: Tuple with (out filename, err filename). None to avoid stdout and sdterr fd redirection. :param python_mpi: If it is a MPI task. :param collections_layouts: collections layouts for python MPI tasks :param cache_queue: Cache tracker communication queue :param cache_ids: Cache proxy dictionary (read-only) :param cache_profiler: Cache profiler :return: updated_args, exit_code, new_types, new_values, timed_out and except_msg """ if __debug__: logger.debug("BEGIN TASK execution in %s" % process_name) persistent_storage = False if storage_conf != 'null': persistent_storage = True # Retrieve the parameters from the params argument path = params[0] method_name = params[1] num_slaves = int(params[3]) time_out = int(params[2]) slaves = [] for i in range(3, 3 + num_slaves): slaves.append(params[i]) arg_position = 4 + num_slaves args = params[arg_position:] cus = args[0] # noqa args = args[1:] has_target = args[0] # Next parameter: return_type = args[1] return_length = int(args[2]) num_params = int(args[3]) args = args[4:] # COMPSs keywords for tasks (ie: tracing, process name...) # compss_key is included to be checked in the @task decorator, so that # the task knows if it has been called from the worker or from the # user code (reason: ignore @task decorator if called from another task # or decide if submit to runtime if nesting is enabled). compss_kwargs = { 'compss_key': True, 'compss_tracing': tracing, 'compss_process_name': process_name, 'compss_storage_conf': storage_conf, 'compss_return_length': return_length, 'compss_logger': logger, 'compss_log_cfg': logger_cfg, 'compss_log_files': log_files, 'compss_python_MPI': python_mpi, 'compss_collections_layouts': collections_layouts, 'cache_queue': cache_queue, 'cache_ids': cache_ids, 'cache_profiler': cache_profiler, } if __debug__: logger.debug("COMPSs parameters:") logger.debug("\t- Storage conf: %s" % str(storage_conf)) logger.debug("\t- Logger cfg: %s" % str(logger_cfg)) if log_files: logger.debug("\t- Log out file: %s" % str(log_files[0])) logger.debug("\t- Log err file: %s" % str(log_files[1])) else: logger.debug("\t- Log out and err not redirected") logger.debug("\t- Params: %s" % str(params)) logger.debug("\t- Path: %s" % str(path)) logger.debug("\t- Method name: %s" % str(method_name)) logger.debug("\t- Num slaves: %s" % str(num_slaves)) logger.debug("\t- Slaves: %s" % str(slaves)) logger.debug("\t- Cus: %s" % str(cus)) logger.debug("\t- Has target: %s" % str(has_target)) logger.debug("\t- Num Params: %s" % str(num_params)) logger.debug("\t- Return Length: %s" % str(return_length)) logger.debug("\t- Args: %r" % args) logger.debug("\t- COMPSs kwargs:") for k, v in compss_kwargs.items(): logger.debug("\t\t- %s: %s" % (str(k), str(v))) # Get all parameter values if __debug__: logger.debug("Processing parameters:") # logger.debug(args) values = get_task_params(num_params, logger, args) types = [x.content_type for x in values] if __debug__: logger.debug("RUN TASK with arguments:") logger.debug("\t- Path: %s" % path) logger.debug("\t- Method/function name: %s" % method_name) logger.debug("\t- Has target: %s" % str(has_target)) logger.debug("\t- # parameters: %s" % str(num_params)) # Next parameters are the values: # logger.debug("\t- Values:") # for v in values: # logger.debug("\t\t %r" % v) # logger.debug("\t- COMPSs types:") # for t in types: # logger.debug("\t\t %s" % str(t)) import_error = False if __debug__: logger.debug("LOAD TASK:") try: # Try to import the module (for functions) if __debug__: logger.debug("\t- Trying to import the user module: %s" % path) module = import_user_module(path, logger) except ImportError: if __debug__: msg = "\t- Could not import the module. Reason: Method in class." logger.debug(msg) import_error = True if __debug__: logger.debug("EXECUTE TASK:") if not import_error: # Module method declared as task result = task_execution(logger, process_name, module, method_name, time_out, types, values, compss_kwargs, persistent_storage, storage_conf) exit_code = result[0] new_types = result[1] new_values = result[2] # Next result: target_direction = result[3] timed_out = result[4] except_msg = result[5] else: # Method declared as task in class # Not the path of a module, it ends with a class name class_name = path.split('.')[-1] if '.' in path: module_name = '.'.join(path.split('.')[0:-1]) else: module_name = path try: module = __import__(module_name, fromlist=[class_name]) klass = getattr(module, class_name) except Exception: # noqa exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) exception_message = "EXCEPTION IMPORTING MODULE IN %s\n" % process_name exception_message += ''.join(line for line in lines) logger.exception(exception_message) return 1, [], [], None, exception_message if __debug__: logger.debug("Method in class %s of module %s" % (class_name, module_name)) logger.debug("Has target: %s" % str(has_target)) if has_target == 'true': # Instance method # The self object needs to be an object in order to call the # function. So, it can not be done in the @task decorator. # Since the args structure is parameters + self + returns we pop # the corresponding considering the return_length notified by the # runtime (-1 due to index starts from 0). self_index = num_params - return_length - 1 self_elem = values.pop(self_index) self_type = types.pop(self_index) if self_type == parameter.TYPE.EXTERNAL_PSCO: if __debug__: logger.debug("Last element (self) is a PSCO with id: %s" % str(self_elem.content)) obj = get_by_id(self_elem.content) else: obj = None file_name = "None" if self_elem.content == "": file_name = self_elem.file_name.original_path if __debug__: logger.debug("\t- Deserialize self from file.") try: obj = deserialize_from_file(file_name) except Exception: # noqa exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) exception_message = "EXCEPTION DESERIALIZING SELF IN %s\n" % process_name exception_message += ''.join(line for line in lines) logger.exception(exception_message) return 1, [], [], None, exception_message if __debug__: logger.debug("Deserialized self object is: %s" % self_elem.content) logger.debug("Processing callee, a hidden object of %s in file %s" % # noqa: E501 (file_name, type(self_elem.content))) values.insert(0, obj) # noqa if not self_type == parameter.TYPE.EXTERNAL_PSCO: types.insert(0, parameter.TYPE.OBJECT) else: types.insert(0, parameter.TYPE.EXTERNAL_PSCO) result = task_execution(logger, process_name, klass, method_name, time_out, types, values, compss_kwargs, persistent_storage, storage_conf) exit_code = result[0] new_types = result[1] new_values = result[2] target_direction = result[3] timed_out = result[4] except_msg = result[5] # Depending on the target_direction option, it is necessary to # serialize again self or not. Since this option is only visible # within the task decorator, the task_execution returns the value # of target_direction in order to know here if self has to be # serialized. This solution avoids to use inspect. if target_direction is not None and \ (target_direction.direction == parameter.DIRECTION.INOUT or target_direction.direction == parameter.DIRECTION.COMMUTATIVE): # noqa: E501 if is_psco(obj): # There is no explicit update if self is a PSCO. # Consequently, the changes on the PSCO must have been # pushed into the storage automatically on each PSCO # modification. if __debug__: logger.debug("The changes on the PSCO must have been" + " automatically updated by the storage.") else: if __debug__: logger.debug("Serializing self (%r) to file: %s" % (obj, file_name)) try: serialize_to_file(obj, file_name) except Exception: # noqa # Catch any serialization exception exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.exception("EXCEPTION SERIALIZING SELF IN %s" % process_name) # noqa: E501 logger.exception(''.join(line for line in lines)) exit_code = 1 if __debug__: logger.debug("Serialized successfully") else: # Class method - class is not included in values (e.g. values=[7]) types.append(None) # class must be first type result = task_execution(logger, process_name, klass, method_name, time_out, types, values, compss_kwargs, persistent_storage, storage_conf) exit_code = result[0] new_types = result[1] new_values = result[2] # Next return: target_direction = result[3] timed_out = result[4] except_msg = result[5] if __debug__: if exit_code != 0: logger.debug("EXECUTE TASK FAILED: Exit code: %s" % str(exit_code)) else: logger.debug("END TASK execution. Status: Ok") return int(exit_code), new_types, new_values, timed_out, except_msg
def _synchronize(obj, mode): # type: (object, int) -> object """ Synchronization function. This method retrieves the value of a future object. Calls the runtime in order to wait for the value and returns it when received. :param obj: Object to synchronize. :param mode: Direction of the object to synchronize. :return: The value of the object requested. """ # TODO: Add a boolean to differentiate between files and object on the # COMPSs.open_file call. This change pretends to obtain better traces. # Must be implemented first in the Runtime, then in the bindings common # C API and finally add the boolean here app_id = 0 if is_psco(obj): obj_id = get_id(obj) if not OT_is_pending_to_synchronize(obj_id): return obj else: # file_path is of the form storage://pscoId or # file://sys_path_to_file file_path = COMPSs.open_file(app_id, "".join(("storage://", str(obj_id))), mode) # TODO: Add switch on protocol (first parameter returned currently ignored) _, file_name = file_path.split("://") new_obj = get_by_id(file_name) OT_stop_tracking(obj) return new_obj obj_id = OT_is_tracked(obj) if obj_id is None: # Not being tracked return obj if not OT_is_pending_to_synchronize(obj_id): return obj if __debug__: logger.debug("Synchronizing object %s with mode %s" % (obj_id, mode)) file_name = OT_get_file_name(obj_id) compss_file = COMPSs.open_file(app_id, file_name, mode) # Runtime can return a path or a PSCOId if compss_file.startswith('/'): # If the real filename is null, then return None. The task that # produces the output file may have been ignored or cancelled, so its # result does not exist. real_file_name = compss_file.split('/')[-1] if real_file_name == "null": print("WARNING: Could not retrieve the object " + str(file_name) + " since the task that produces it may have been IGNORED or CANCELLED. Please, check the logs. Returning None.") # noqa: E501 return None new_obj = deserialize_from_file(compss_file) COMPSs.close_file(app_id, file_name, mode) else: new_obj = get_by_id(compss_file) if mode == 'r': OT_update_mapping(obj_id, new_obj) if mode != 'r': COMPSs.delete_file(app_id, OT_get_file_name(obj_id), False) OT_stop_tracking(obj) return new_obj
def retrieve_content(self, argument, name_prefix, python_mpi, collections_layouts, depth=0): # type: (Parameter, str, bool, list, int) -> None """ Retrieve the content of a particular argument. :param argument: Argument. :param name_prefix: Name prefix. :param python_mpi: If the task is python MPI. :param collections_layouts: Layouts of collections params for python MPI tasks. :param depth: Collection depth (0 if not a collection). :return: None """ if __debug__: logger.debug("\t - Revealing: " + str(argument.name)) # This case is special, as a FILE can actually mean a FILE or an # object that is serialized in a file if is_vararg(argument.name): self.param_varargs = argument.name if __debug__: logger.debug("\t\t - It is vararg") content_type = argument.content_type type_file = parameter.TYPE.FILE type_directory = parameter.TYPE.DIRECTORY type_external_stream = parameter.TYPE.EXTERNAL_STREAM type_collection = parameter.TYPE.COLLECTION type_external_psco = parameter.TYPE.EXTERNAL_PSCO if content_type == type_file: if self.is_parameter_an_object(argument.name): # The object is stored in some file, load and deserialize f_name = argument.file_name.split(':')[-1] if __debug__: logger.debug("\t\t - It is an OBJECT. Deserializing from file: " + str(f_name)) # noqa: E501 argument.content = deserialize_from_file(f_name) if __debug__: logger.debug("\t\t - Deserialization finished") else: # The object is a FILE, just forward the path of the file # as a string parameter argument.content = argument.file_name.split(':')[-1] if __debug__: logger.debug("\t\t - It is FILE: " + str(argument.content)) elif content_type == type_directory: if __debug__: logger.debug("\t\t - It is a DIRECTORY") argument.content = argument.file_name.split(":")[-1] elif content_type == type_external_stream: if __debug__: logger.debug("\t\t - It is an EXTERNAL STREAM") argument.content = deserialize_from_file(argument.file_name) elif content_type == type_collection: argument.content = [] # This field is exclusive for COLLECTION_T parameters, so make # sure you have checked this parameter is a collection before # consulting it argument.collection_content = [] col_f_name = argument.file_name.split(':')[-1] # maybe it is an inner-collection.. _dec_arg = self.decorator_arguments.get(argument.name, None) _col_dir = _dec_arg.direction if _dec_arg else None _col_dep = _dec_arg.depth if _dec_arg else depth if __debug__: logger.debug("\t\t - It is a COLLECTION: " + str(col_f_name)) logger.debug("\t\t\t - Depth: " + str(_col_dep)) # Check if this collection is in layout # Three conditions: # 1- this is a mpi task # 2- it has a collection layout # 3- the current argument is the layout target in_mpi_collection_env = False if python_mpi and collections_layouts and \ collections_layouts[0] == argument.name: in_mpi_collection_env = True from pycompss.util.mpi.helper import rank_distributor # Call rank_distributor if the current param is the target of # the layout for each rank, return its offset(s) in the # collection. rank_distribution = rank_distributor(collections_layouts[1:]) rank_distr_len = len(rank_distribution) if __debug__: logger.debug("Rank distribution is: " + str(rank_distribution)) # noqa: E501 for (i, line) in enumerate(open(col_f_name, 'r')): if in_mpi_collection_env: # Isn't this my offset? skip if i not in rank_distribution: continue data_type, content_file, content_type = line.strip().split() # Same naming convention as in COMPSsRuntimeImpl.java sub_name = "%s.%d" % (argument.name, i) if name_prefix: sub_name = "%s.%s" % (name_prefix, argument.name) else: sub_name = "@%s" % sub_name if __debug__: logger.debug("\t\t\t - Revealing element: " + str(sub_name)) if not self.is_parameter_file_collection(argument.name): sub_arg, _ = build_task_parameter(int(data_type), parameter.IOSTREAM.UNSPECIFIED, # noqa: E501 "", sub_name, content_file, argument.content_type) # if direction of the collection is 'out', it means we # haven't received serialized objects from the Master # (even though parameters have 'file_name', those files # haven't been created yet). plus, inner collections of # col_out params do NOT have 'direction', we identify # them by 'depth'.. if _col_dir == parameter.DIRECTION.OUT or \ ((_col_dir is None) and _col_dep > 0): # if we are at the last level of COL_OUT param, # create 'empty' instances of elements if _col_dep == 1: temp = create_object_by_con_type(content_type) sub_arg.content = temp # In case that only one element is used in this # mpi rank, the collection list is removed if in_mpi_collection_env and rank_distr_len == 1: argument.content = sub_arg.content argument.content_type = sub_arg.content_type else: argument.content.append(sub_arg.content) argument.collection_content.append(sub_arg) else: self.retrieve_content(sub_arg, sub_name, python_mpi, collections_layouts, depth=_col_dep - 1) # In case that only one element is used in this mpi # rank, the collection list is removed if in_mpi_collection_env and rank_distr_len == 1: argument.content = sub_arg.content argument.content_type = sub_arg.content_type else: argument.content.append(sub_arg.content) argument.collection_content.append(sub_arg) else: # Recursively call the retrieve method, fill the # content field in our new taskParameter object self.retrieve_content(sub_arg, sub_name, python_mpi, collections_layouts) # In case only one element is used in this mpi rank, # the collection list is removed if in_mpi_collection_env and rank_distr_len == 1: argument.content = sub_arg.content argument.content_type = sub_arg.content_type else: argument.content.append(sub_arg.content) argument.collection_content.append(sub_arg) else: # In case only one element is used in this mpi rank, # the collection list is removed if in_mpi_collection_env and rank_distr_len == 1: argument.content = content_file argument.content_type = parameter.TYPE.FILE else: argument.content.append(content_file) argument.collection_content.append(content_file) elif not self.storage_supports_pipelining() and \ content_type == type_external_psco: if __debug__: logger.debug("\t\t - It is a PSCO") # The object is a PSCO and the storage does not support # pipelining, do a single getByID of the PSCO from storage.api import getByID # noqa argument.content = getByID(argument.content)