def test_same_input_than(self): mtime_epoch_millis1, mtime_human1 = get_current_time() # moment = mtime_epoch_millis t1 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") # t1.set_table(FooBase) t1.model_declarative_meta = FooBase t1.relation_file_or_tableioinfo_to_typeio = self.input_entry modif = TableModificationTime(table_name="FooBase", mtime_epoch_millis=mtime_epoch_millis1, mtime_human=mtime_human1) modif.relation_tablemodiftime_to_tableioinfo.append(t1) t2 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") # t2.set_table(FooBase) t2.model_declarative_meta = FooBase t2.relation_file_or_tableioinfo_to_typeio = self.input_entry modif = TableModificationTime(table_name="FooBase", mtime_epoch_millis=mtime_epoch_millis1, mtime_human=mtime_human1) modif.relation_tablemodiftime_to_tableioinfo.append(t2) f1 = FileInputOutputInformation(file_key="input1", path="path1", mtime_epoch_millis=mtime_epoch_millis1, size=0) f1.relation_file_or_tableioinfo_to_typeio = self.input_entry f2 = FileInputOutputInformation(file_key="input1", path="path1", mtime_epoch_millis=mtime_epoch_millis1, size=0) f2.relation_file_or_tableioinfo_to_typeio = self.input_entry toolwrapper1 = FooWrapper2(rule_name="rule1") toolwrapper1.relation_toolwrapper_to_fileioinfo.append(f1) toolwrapper1.relation_toolwrapper_to_tableioinfo.append(t1) toolwrapper2 = FooWrapper2(rule_name="rule1") toolwrapper2.relation_toolwrapper_to_fileioinfo.append(f2) toolwrapper2.relation_toolwrapper_to_tableioinfo.append(t2) t3 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") # t3.set_table(FooBase) t3.model_declarative_meta = FooBase t3.relation_file_or_tableioinfo_to_typeio = self.input_entry modif = TableModificationTime(table_name="FooBase", mtime_epoch_millis=mtime_epoch_millis1, mtime_human=mtime_human1) modif.relation_tablemodiftime_to_tableioinfo.append(t3) time.sleep(0.05) mtime_epoch_millis2, mtime_human2 = get_current_time() f3 = FileInputOutputInformation(file_key="input1", path="path1", mtime_epoch_millis=mtime_epoch_millis2, size=0) f3.relation_file_or_tableioinfo_to_typeio = self.input_entry toolwrapper3 = FooWrapper2(rule_name="rule1") toolwrapper3.relation_toolwrapper_to_fileioinfo.append(f3) toolwrapper3.relation_toolwrapper_to_tableioinfo.append(t3) self.assertTrue(toolwrapper1.same_input_than(toolwrapper2)) self.assertFalse(toolwrapper1.same_input_than(toolwrapper3))
def test_is_output_ok(self): mtime_epoch_millis, mtime_human = get_current_time() moment = mtime_epoch_millis t1 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") # t1.set_table(FooBase) t1.model_declarative_meta = FooBase t1.relation_file_or_tableioinfo_to_typeio = self.input_entry t1.mtime_epoch_millis = moment modif = TableModificationTime(table_name="FooBase", mtime_epoch_millis=moment, mtime_human=mtime_human) modif.relation_tablemodiftime_to_tableioinfo.append(t1) path_f1 = os.path.join(self.test_path, "outdir/path1") time.sleep(2) p = subprocess.Popen(["touch", path_f1]) p.wait() f1 = FileInputOutputInformation(file_key="input1", path=path_f1, mtime_epoch_millis=get_mtime(path_f1), size=os.path.getsize(path_f1)) f1.relation_file_or_tableioinfo_to_typeio = self.output_entry toolwrapper1 = FooWrapper2(rule_name="rule1") toolwrapper1.relation_toolwrapper_to_fileioinfo.append(f1) toolwrapper1.relation_toolwrapper_to_tableioinfo.append(t1) f1 = FileInputOutputInformation(file_key="input1", path=path_f1, mtime_epoch_millis=get_mtime(path_f1), size=os.path.getsize(path_f1)) f1.relation_file_or_tableioinfo_to_typeio = self.output_entry mtime_epoch_millis, mtime_human = get_current_time() moment = mtime_epoch_millis t1 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") # t1.set_table(FooBase) t1.model_declarative_meta = FooBase t1.relation_file_or_tableioinfo_to_typeio = self.input_entry t1.mtime_epoch_millis = moment modif = TableModificationTime(table_name="FooBase", mtime_epoch_millis=moment, mtime_human=mtime_human) modif.relation_tablemodiftime_to_tableioinfo.append(t1) toolwrapper2 = FooWrapper2(rule_name="rule1") toolwrapper2.relation_toolwrapper_to_fileioinfo.append(f1) toolwrapper2.relation_toolwrapper_to_tableioinfo.append(t1)
def test_run_rerun_runtime(self): # Slow run cmd_line = [ "python", "-D", self.__db_url, "-w", self.__example_def_file1, "-v" ] time_unix_ms, time_human = get_current_time() start = time_unix_ms with self.assertRaises(SystemExit): WopMars().run(cmd_line) time_unix_ms, time_human = get_current_time() end = time_unix_ms runtime1 = end - start # Fast run run2<run1 time_unix_ms, time_human = get_current_time() start = time_unix_ms with self.assertRaises(SystemExit): WopMars().run(cmd_line) time_unix_ms, time_human = get_current_time() end = time_unix_ms runtime2 = end - start self.assertGreater(runtime1 * 1.5, runtime2) # Middle run: run3>run2 PathManager.unlink("outdir/output_file1.txt") time_unix_ms, time_human = get_current_time() start = time_unix_ms with self.assertRaises(SystemExit): WopMars().run(cmd_line) time_unix_ms, time_human = get_current_time() end = time_unix_ms runtime3 = end - start self.assertLess(runtime2, runtime3)
def test_run_skipping_steps_time_check(self): cmd_line = [ "python", "-D", self.__db_url, "-w", self.__example_def_file2_only_files, "-v" ] time_unix_ms, time_human = get_current_time() start = time_unix_ms with self.assertRaises(SystemExit): WopMars().run(cmd_line) time_unix_ms, time_human = get_current_time() end = time_unix_ms runtime1 = end - start time_unix_ms, time_human = get_current_time() start = time_unix_ms with self.assertRaises(SystemExit): WopMars().run(cmd_line) time_unix_ms, time_human = get_current_time() end = time_unix_ms runtime2 = end - start self.assertGreater(runtime1 * 1.5, runtime2) # pathlib.Path('outdir/output_file1.txt').unlink() PathManager.unlink("outdir/output_file1.txt")
def run_queue(self): """ Call start() method of all elements of the queue. The tools inside the queue are taken then their inputs are checked. If they are ready, the tools are started. If not, they are put in a buffer list of "not ready tools" or "ready but has not necessary ressources available tools". The start method is called with a dry argument, if it appears that the input of the ToolWrapper are the same than in a previous execution, and that the output are already ready. The dry parameter is set to True and the start method will only simulate the execution. After that, the code check for the state of the workflow and gather the informations to see if the workflow is finished, if it encounter an error or if it is currently running. :raises WopMarsException: The workflow encounter a problem and must stop. """ # # # toTODO LucG THIS METHOD IS NOT THREAD-SAFE (peut etre que si, à voir) # ################################################################################################################ # # Main while # If no tools have been added to the queue: # - All tools have been executed and the queue is empty, so nothing happens # - There were remaining tools in the queue but they weren't ready, so they are tested again # ################################################################################################################ while not self.__queue_exec.empty(): Logger.instance().debug("Queue size: " + str(self.__queue_exec.qsize())) Logger.instance().debug("Queue content: " + str([ "rule: " + tt.get_toolwrapper().rule_name + "->" + tt.get_toolwrapper().tool_python_path for tt in self.__queue_exec.get_queue_tuple() ])) ############################################################################################################ # # get the first element of the queue to execute # ############################################################################################################ tool_wrapper_thread = self.__queue_exec.get() tool_wrapper = tool_wrapper_thread.get_toolwrapper() Logger.instance().debug("Current rule: " + tool_wrapper.rule_name + "->" + tool_wrapper.tool_python_path) # check if the predecessors of a rule have been already executed: a rule shouldn't be executed if # its predecessors have not been executed yet if not self.all_predecessors_have_run(tool_wrapper): Logger.instance().debug("Predecessors of rule: " + tool_wrapper.rule_name + " have not been executed yet.") ############################################################################################################ # # Ready for running, either inputs are ready or dry-run mode is enabled # ############################################################################################################ elif tool_wrapper.are_inputs_ready() or OptionManager.instance( )["--dry-run"]: # the state of inputs (table and file) are set in the db here. tool_wrapper.set_args_time_and_size(1) Logger.instance().debug("ToolWrapper ready: " + tool_wrapper.tool_python_path) dry = False ############################################################################################################ # # Will set to dry (ie. will not execute) if all these conditions are true # - not in forceall mode # - tool already executed previously # - some predecessors of this tool wrapper has not been executed # ############################################################################################################ # check if the actual execution of the tool_python_path is necessary # every predecessors of the tool_python_path have to be executed (or simulated) # will not execute and set to dry if all these options if not OptionManager.instance( )["--forceall"] and not OptionManager.instance( )["--touch"]: # if not in forceall option if self.is_this_tool_wrapper_already_executed( tool_wrapper ): # this tool wrapper already executed # some predecessors of this tool wrapper has not been executed if not bool([ tool_wrapper_predecessor for tool_wrapper_predecessor in self.__dag_to_exec.predecessors(tool_wrapper) if tool_wrapper_predecessor.status != "EXECUTED" and tool_wrapper_predecessor.status != "ALREADY_EXECUTED" ]): Logger.instance().info( "ToolWrapper: {} -> {} seems to have already been run with same parameters." .format(tool_wrapper.rule_name, tool_wrapper.tool_python_path)) dry = True # totodo lucg twthread verification des resources tool_wrapper_thread.subscribe(self) self.__count_exec += 1 # totodo lucg twthread methode start tool_wrapper_thread.set_dry(dry) try: # be careful here: the execution of the toolthreads is recursive meaning that calls to function may # be stacked (run -> notify success -> run(next tool) -> notify success(next tool) -> etc.... # totodo lucg twthread methode start tool_wrapper_thread.run() except Exception as e: # as mentioned above, there may be recursive calls to this function, so every exception can # pass here multiple times: this attribute is used for recognizing exception that have already been # caught if not hasattr(e, "teb_already_seen"): setattr(e, "teb_already_seen", True) tool_wrapper.set_execution_infos(status="ERROR") self.__session.add(tool_wrapper) self.__session.commit() raise e else: Logger.instance().debug("ToolWrapper not ready: rule: " + tool_wrapper.rule_name + " -> " + str(tool_wrapper.tool_python_path)) # The buffer contains the ToolWrappers that have inputs which are not ready yet. self.__list_queue_buffer.append(tool_wrapper_thread) Logger.instance().debug("Buffer: " + str([ "rule: " + t.get_toolwrapper().rule_name + "->" + t.get_toolwrapper().tool_python_path for t in self.__list_queue_buffer ])) Logger.instance().debug("Running rules: " + str(self.__count_exec)) # There is no more ToolWrapper that are waiting to be executed. # Is there some tools that are currently being executed? if self.__count_exec == 0: # Is there some tools that weren't ready? finish_epoch_millis_unix_ms, finish_epoch_millis_datetime = get_current_time( ) if len(self.__list_queue_buffer) == 0: # If there is no tool waiting and no tool being executed, the workflow has finished. # finished_at = finish_epoch_millis_unix_ms # finished_at_strftime = datetime.datetime.fromtimestamp(finished_at/1000).strftime('%Y-%m-%d %H:%M:%S') Logger.instance().info( "The workflow has completed. Finished at: {}".format( finish_epoch_millis_datetime)) self.set_finishing_informations(finish_epoch_millis_datetime, "FINISHED") SQLManager.instance().get_session().close() sys.exit(0) # uniquement en environnement multiThreadpredece elif not self.check_buffer(): # If there is no tool being executed but there is that are waiting something, the workflow has an issue # finished_at = time_unix_ms() tw_list = [ t.get_toolwrapper() for t in self.__list_queue_buffer ] if len(tw_list) > 0: input_files_not_ready = tw_list[ 0].get_input_files_not_ready() self.set_finishing_informations( finish_epoch_millis_datetime, "ERROR") raise WopMarsException( "The workflow has failed.", " The inputs '{}' have failed for this tool '{}'". format(input_files_not_ready[0], tw_list[0].rule_name))
def run(argv): """ Entry-point of the program """ # if the command line is malformed, docopt interrupt the software. try: if argv[1:] == []: # If not arguments, run the help argv.append('-h') OptionManager.instance().update(docopt(__doc__, argv=argv[1:])) except DocoptExit as SE: print("Bad argument in the command line: \n\t" + " ".join(argv) + "\n" + str(SE)) sys.exit(2) try: schema_option = Schema({ '--wopfile': Or("Wopfile.yml", str), '--database': Use(PathManager.check_database_valid_url), '-v': Or(0, And(int, lambda n: 1 <= n <= 2)), '--dot': Or( None, And(Use(PathManager.check_valid_path), Use(PathManager.check_pygraphviz))), "--log": Use(PathManager.check_valid_path), # '--printtools': Use(bool), "--since": Or(None, str), "--until": Or(None, str), "--forceall": Use(bool), "--dry-run": Use(bool), "--touch": Use(bool), "--directory": Use(lambda path: pathlib.Path(path).mkdir(parents=True, exist_ok=True)), "--input": Use(DictUtils.str_to_dict), "--output": Use(DictUtils.str_to_dict), "--params": Use(DictUtils.str_to_dict), "TOOLWRAPPER": Or(None, Use(PathManager.is_in_python_path)), "tool": Use(bool), "example": Use(bool), "--version": Use(bool), "--cleanup-metadata": Use(bool), }) # The option values are validated using schema library OptionManager.instance().validate(schema_option) os.chdir(OptionManager.instance()["--directory"]) except SchemaError as schema_msg: Logger.instance().debug("\nCommand line Args:" + str(OptionManager.instance())) # regex for the different possible error messages. match_open_def = re.match(r"^open\('(.[^\)]+)'\)", str(schema_msg)) match_dot_def = re.match(r"^check_valid_path\(('.[^\)]+')\)", str(schema_msg)) match_wrong_key = re.match(r"^Wrong keys ('.[^\)]+')", str(schema_msg)) match_pygraphviz = re.match(r".*dot.*", str(schema_msg)) print(match_pygraphviz) # Check the different regex.. if match_open_def: Logger.instance().error("The file " + match_open_def.group(1) + " cannot be opened. It may not exist.") elif match_dot_def: Logger.instance().error("The path " + match_dot_def.group(1) + " is not valid.") elif match_wrong_key: # Normally never reach Logger.instance().error("The option key " + match_wrong_key.group(1) + " is not known.") elif match_pygraphviz: Logger.instance().error( "The dot file path is not valid or the pygraphviz module is not installed. In the second case, install wopmars with pygraphviz: pip install wopmars[pygraphviz]" ) else: # Normally never reach Logger.instance().error( "An unknown error has occured. Message: " + str(schema_msg)) sys.exit(2) Logger.instance().debug("\nCommand line Args:" + str(OptionManager.instance())) ############################################################################################ # # Print version to stdout and exists # ############################################################################################ if OptionManager.instance()["--version"]: print("wopmars {}".format(__version__), file=sys.stdout) sys.exit(0) ############################################################################################ # # Recursively writes quickstart example and exists # ############################################################################################ if OptionManager.instance()["example"]: # ExampleBuilder().build() source_path = os.path.join(PathManager.get_package_path(), "data/example") destination_path = os.path.join("example") shutil.rmtree(destination_path, ignore_errors=True) shutil.copytree(source_path, destination_path) sys.exit(0) ############################################################################################ # # Initiates new WorkflowManager instance # ############################################################################################ workflow_manager = WorkflowManager() ############################################################################################ # # Cleans up non fully terminated executions # ############################################################################################ SQLManager.instance().clean_up_unexecuted_tool_wrappers() ############################################################################################ # # --cleanup-metadata (clear history and exit) # ############################################################################################ if OptionManager.instance()["--cleanup-metadata"]: Logger.instance().info("Deleting Wopmars history...") # Check if sqlite db path exists if pathlib.Path(SQLManager.instance(). d_database_config['db_database']).is_file(): SQLManager.instance().clear_wopmars_history() if OptionManager.instance()["--cleanup-metadata"]: sys.exit(0) try: workflow_manager.run() except WopMarsException as WE: Logger.instance().error(str(WE)) try: timestamp_epoch_millis, timestamp_human = get_current_time() Logger.instance().error( "The workflow has encountered an error at: {}".format( timestamp_human)) workflow_manager.set_finishing_informations( timestamp_human, "ERROR") except AttributeError: SQLManager.instance().get_session().rollback() Logger.instance().error( "The execution has not even begun. No informations will be stored in the database." ) except Exception as e: Logger.instance().error( "An error occurred during the rollback of the changement of the database which can be now unstable:" + str(e)) sys.exit(1) except Exception as e: Logger.instance().error("An unknown error has occurred:\n" + str(e)) sys.exit(1)
def create_tool_wrapper_inst(self, rule_name, tool_python_path, dict_dict_dict_elm, input_entry, output_entry): """ Actual creating of the Toolwrapper object. The tool_python_path object is an entry of the table rule in the resulting database. If the scoped_session has current modification, they probably will be commited during this method: models are created and this can only be done with clean session. :param rule_name: Contains the is_input of the rule in which the tool_python_path will be used. :type rule_name: str :param tool_python_path: Contains the is_input of the tool_python_path. It will be used for importing the correct module and then for creating the class :type tool_python_path: str :param dict_dict_dict_elm: "input"s "output"s and "params" and will be used to make relations between options / input / output and the tool_python_path. :type dict_dict_dict_elm: dict(dict(dict())) :param input_entry: input entry :type input_entry: :class:`wopmars.framework.bdd.models.TypeInputOrOutput.TypeInputOrOutput` :param output_entry: output entry :type output_entry: :class:`wopmars.framework.bdd.models.TypeInputOrOutput.TypeInputOrOutput` :return: TooLWrapper instance """ session = SQLManager.instance().get_session() # Importing the module in the mod variable try: mod = importlib.import_module(tool_python_path) # Building the class object ToolWrapper_class = eval("mod." + tool_python_path.split('.')[-1]) except AttributeError: raise WopMarsException( "Error while parsing the configuration file: \n\t", "The class " + tool_python_path + " doesn't exist.") except ImportError as IE: if tool_python_path in str(IE): raise WopMarsException( "Error while parsing the configuration file:", tool_python_path + " module is not in the pythonpath. ") else: raise WopMarsException( "Error while parsing the configuration file:", tool_python_path + " module contains an ImportError: " + str(IE)) # Initialize the instance of the user ToolWrapper tool_wrapper_inst = ToolWrapper_class(rule_name=rule_name) # associating ToolWrapper instances with their files / models for elm in dict_dict_dict_elm["dict_input"]: if elm == "file": for input_f in dict_dict_dict_elm["dict_input"][elm]: # set the type of FileInputOutputInformation object iofileput_entry = dict_dict_dict_elm["dict_input"][elm][ input_f] iofileput_entry.relation_file_or_tableioinfo_to_typeio = input_entry try: # associating file and tool_python_path tool_wrapper_inst.relation_toolwrapper_to_fileioinfo.append( iofileput_entry) except ObjectDeletedError as e: raise WopMarsException( "Error in the tool_python_path class declaration. Please, notice the developer", "The error is probably caused by the lack of the 'polymorphic_identity' attribute" " in the tool_python_path. Error message: \n" + str(e)) elif elm == "table": for input_t in dict_dict_dict_elm["dict_input"][elm]: # input_t is the is_input of the table (not the model) # this is a preventing commit because next statement will create a new table and the session has to # be clean. I think it is a bug in SQLAlchemy which not allows queries then insert statements in # the same session session.commit() iodbput_entry = dict_dict_dict_elm["dict_input"][elm][ input_t] # the user-side models are created during the reading of the definition file # table_entry = TableInputOutputInformation(is_input=dict_dict_dict_elm["dict_input"][elm][input_t], tablename=input_t) # insert in the database the mtime_epoch_millis of last modification of a developper-side table time_unix_ms, time_human = get_current_time() model_py_path_suffix = dict_dict_dict_elm["dict_input"][ elm][input_t].model_py_path.split('.')[-1] modification_table_entry, created = session.get_or_create( TableModificationTime, defaults={ "mtime_epoch_millis": time_unix_ms, "mtime_human": time_human }, table_name=model_py_path_suffix) iodbput_entry.relation_tableioinfo_to_tablemodiftime = modification_table_entry iodbput_entry.relation_file_or_tableioinfo_to_typeio = input_entry try: tool_wrapper_inst.relation_toolwrapper_to_tableioinfo.append( iodbput_entry) except ObjectDeletedError as e: raise WopMarsException( "Error in the tool_python_path class declaration. Please, notice the developer", "The error is probably caused by the lack of the 'polymorphic_identity' attribute" " in the tool_python_path. Error message: \n" + str(e)) for elm in dict_dict_dict_elm["dict_output"]: if elm == "file": for output_f in dict_dict_dict_elm["dict_output"][elm]: iofileput_entry = dict_dict_dict_elm["dict_output"][elm][ output_f] iofileput_entry.relation_file_or_tableioinfo_to_typeio = output_entry try: tool_wrapper_inst.relation_toolwrapper_to_fileioinfo.append( iofileput_entry) except ObjectDeletedError as e: raise WopMarsException( "Error in the tool_python_path class declaration. Please, notice the developer", "The error is probably caused by the lack of the 'polymorphic_identity' attribute" " in the tool_python_path. Error message: \n" + str(e)) elif elm == "table": for output_t in dict_dict_dict_elm["dict_output"][elm]: # output_t is the table is_input (not the model) session.commit() iodbput_entry = dict_dict_dict_elm["dict_output"][elm][ output_t] time_unix_ms, time_human = get_current_time() # This corresponds the __tablename__ of the database in the database model_py_path_suffix = dict_dict_dict_elm["dict_output"][ elm][output_t].model_py_path.split('.')[-1] modification_table_entry, created = session.get_or_create( TableModificationTime, defaults={ "mtime_epoch_millis": time_unix_ms, "mtime_human": time_human }, table_name=model_py_path_suffix) iodbput_entry.relation_tableioinfo_to_tablemodiftime = modification_table_entry iodbput_entry.relation_file_or_tableioinfo_to_typeio = output_entry try: tool_wrapper_inst.relation_toolwrapper_to_tableioinfo.append( iodbput_entry) except ObjectDeletedError as e: raise WopMarsException( "Error in the tool_python_path class declaration. Please, notice the developer", "The error is probably caused by the lack of the 'polymorphic_identity' attribute" " in the tool_python_path. Error message: \n" + str(e)) for opt in dict_dict_dict_elm["dict_params"]: # associating option and tool_python_path tool_wrapper_inst.relation_toolwrapper_to_option.append( dict_dict_dict_elm["dict_params"][opt]) # toolwrapper_wrapper.is_content_respected() return tool_wrapper_inst
def iterate_wopfile_yml_dic_and_insert_rules_in_db(self, wopfile_path): """ Reads the file given and insert the rules of the workflow in the database. The definition file is supposed to be properly formed. The validation of the content of the definition is done during the instanciation of the tools. :param: s_definition_file: String containing the path to the definition file. :type wopfile_path: str :raise: WopmarsException: The content is not validated """ self.load_wopfile_as_yml_dic(wopfile_path) session = SQLManager.instance().get_session() # The dict_workflow_definition is assumed to be well formed try: # The same execution entry for the whole workflow-related database entries. time_unix_ms, time_human = get_current_time() execution = Execution(started_at=time_human) # get the types database entries that should have been created previously input_entry = session.query(TypeInputOrOutput).filter( TypeInputOrOutput.is_input == True).one() output_entry = session.query(TypeInputOrOutput).filter( TypeInputOrOutput.is_input == False).one() tool_wrapper_set = set() # Encounter a rule block for yml_key_level1 in self.__wopfile_yml_dict: tool_wrapper_py_path = None # the is_input of the rule is extracted after the "rule" keyword. There shouldn't be a ":" but it costs nothing. rule_name_str = yml_key_level1.split()[-1].strip(":") Logger.instance().debug( "Encounter rule " + rule_name_str + ": \n" + str( DictUtils.pretty_repr( self.__wopfile_yml_dict[yml_key_level1]))) # The dict of "input"s, "output"s and "params" is re-initialized for each tool wrapper tool_wrapper_inst_dic = dict(dict_input={ "file": {}, "table": {} }, dict_params={}, dict_output={ "file": {}, "table": {} }) for yml_key_level2 in self.__wopfile_yml_dict[yml_key_level1]: # key_second_step is supposed to be "tool", "input", "output" or "params" # if type(self.__wopfile_yml_dict[rule_header][yml_key_level_2nd]) == dict: if yml_key_level2 in {"input", "output", "params"}: # if it is a dict, then inputs, outputs or params are coming for yml_key_level3 in self.__wopfile_yml_dict[ yml_key_level1][yml_key_level2]: if yml_key_level2 == "params": # yml_key = yml_key_level3 value = self.__wopfile_yml_dict[ yml_key_level1][yml_key_level2][ yml_key_level3] option_inst = Option(name=yml_key_level3, value=value) tool_wrapper_inst_dic["dict_params"][ yml_key_level3] = option_inst else: # file or table for yml_key_level4 in self.__wopfile_yml_dict[ yml_key_level1][yml_key_level2][ yml_key_level3]: file_or_table_inst = None if yml_key_level3 == "file": # yml_key = yml_key_level4 # str_path_to_file = os.path.join(OptionManager.instance()["--directory"], # self.__wopfile_yml_dict[rule][ # key_second_step][key_third_step][key]) str_path_to_file = self.__wopfile_yml_dict[ yml_key_level1][yml_key_level2][ yml_key_level3][yml_key_level4] file_or_table_inst = FileInputOutputInformation( file_key=yml_key_level4, path=str_path_to_file) elif yml_key_level3 == "table": yml_key = yml_key_level4 modelname = self.__wopfile_yml_dict[ yml_key_level1][yml_key_level2][ yml_key_level3][yml_key] model_py_path = modelname table_name = model_py_path.split( '.')[-1] file_or_table_inst = TableInputOutputInformation( model_py_path=model_py_path, table_key=yml_key_level4, table_name=table_name) # all elements of the current rule block are stored in there # key_second_step is input or output here # tool_wrapper_inst_dic["dict_" + yml_key_level2][yml_key_level3][yml_key] = obj_created tool_wrapper_inst_dic["dict_" + yml_key_level2][yml_key_level3][yml_key_level4] \ = file_or_table_inst Logger.instance().debug("Object " + yml_key_level2 + " " + yml_key_level3 + ": " + yml_key_level4 + " created.") else: # if the step is not a dict, then it is supposed to be the "tool" line tool_wrapper_py_path = self.__wopfile_yml_dict[ yml_key_level1][yml_key_level2] # At this point, "tool_wrapper_inst_dic" is like this: # { # 'dict_params': { # 'option1': Option('option1', 'valueofoption1') # }, # 'dict_input': { # 'file' : { # 'input1': FileInputOutputInformation('input1', 'path/to/input1') # } # 'table': { # 'table1': TableInputOutputInformation('table1', 'package.of.table1') # } # }, # } # Instantiate the referred class and add it to the set of objects tool_wrapper_inst = self.create_tool_wrapper_inst( rule_name_str, tool_wrapper_py_path, tool_wrapper_inst_dic, input_entry, output_entry) # Associating a tool_python_path to an execution tool_wrapper_inst.relation_toolwrapper_to_execution = execution tool_wrapper_set.add(tool_wrapper_inst) Logger.instance().debug("Instance tool_python_path: " + tool_wrapper_py_path + " created.") # commit/rollback trick to clean the session - SQLAchemy bug suspected session.commit() session.rollback() # totodo LucG set_table_properties outside the rules loop to take into account all the models at once # (error if one tool has a foreign key refering to a table that is not in its I/O put TableInputOutputInformation.set_tables_properties( TableInputOutputInformation.get_execution_tables()) session.commit() session.rollback() # This command is creating the triggers that will update the modification TableModificationTime.create_triggers() # This create_all will create all models that have been found in the tool_python_path SQLManager.instance().create_all() session.add_all(tool_wrapper_set) # save all operations done so far. session.commit() for tool_wrapper in tool_wrapper_set: tool_wrapper.is_content_respected() except NoResultFound as e: session.rollback() raise WopMarsException( "Error while parsing the configuration file. The database has not been setUp Correctly.", str(e))
def load_one_toolwrapper(self, s_toolwrapper, s_dict_inputs, s_dict_outputs, s_dict_params): """ Method called when the ``tool`` command is used. It is equivalent to the :meth:`~.wopmars.framework.parsing.Reader.Reader.iterate_wopfile_yml_dic_and_insert_rules_in_db` method but create a workflow with only one tool_python_path. The workflow is also stored inside the database. :param s_toolwrapper: The is_input of the tool_python_path (will be imported) :type s_toolwrapper: str :param s_dict_inputs: A string containing the dict of input files :type s_dict_inputs: str :param s_dict_outputs: A string containing the dict of output files :type s_dict_outputs: str :param s_dict_params: A string containing the dict of params :type s_dict_params: str :raise WopMarsException: There is an error while accessing the database """ session = SQLManager.instance().get_session() dict_inputs = dict(eval(s_dict_inputs)) dict_outputs = dict(eval(s_dict_outputs)) dict_params = dict(eval(s_dict_params)) try: # The same execution entry for the whole workflow-related database entries. time_unix_ms, time_human = get_current_time() execution = Execution(started_at=time_human) # get the types that should have been created previously input_entry = session.query(TypeInputOrOutput).filter( TypeInputOrOutput.is_input == True).one() output_entry = session.query(TypeInputOrOutput).filter( TypeInputOrOutput.is_input == False).one() Logger.instance().debug("Loading unique tool_python_path " + s_toolwrapper) dict_dict_dict_elm = dict(dict_input={ "file": {}, "table": {} }, dict_params={}, dict_output={ "file": {}, "table": {} }) for type in dict_inputs: if type == "file": for s_input in dict_inputs[type]: obj_created = FileInputOutputInformation( file_key=s_input, path=os.path.join( OptionManager.instance()["--directory"], dict_inputs[type][s_input])) dict_dict_dict_elm["dict_input"][type][ s_input] = obj_created Logger.instance().debug("Object input file: " + s_input + " created.") elif type == "table": for s_input in dict_inputs[type]: model_py_path = dict_inputs[type][s_input] table_name = model_py_path.split('.')[-1] obj_created = TableInputOutputInformation( model_py_path=model_py_path, table_key=s_input, table_name=table_name) dict_dict_dict_elm["dict_input"][type][ s_input] = obj_created Logger.instance().debug("Object input table: " + s_input + " created.") for type in dict_outputs: if type == "file": for s_output in dict_outputs[type]: obj_created = FileInputOutputInformation( file_key=s_output, path=dict_outputs[type][s_output]) dict_dict_dict_elm["dict_output"]["file"][ s_output] = obj_created Logger.instance().debug("Object output file: " + s_output + " created.") elif type == "table": for s_output in dict_outputs[type]: model_py_path = dict_outputs[type][s_output] table_name = model_py_path.split('.')[-1] obj_created = TableInputOutputInformation( model_py_path=model_py_path, table_key=s_output, table_name=table_name) dict_dict_dict_elm["dict_output"]["table"][ s_output] = obj_created Logger.instance().debug("Object output table: " + s_output + " created.") for s_param in dict_params: obj_created = Option(name=s_param, value=dict_params[s_param]) dict_dict_dict_elm["dict_params"][s_param] = obj_created Logger.instance().debug("Object option: " + s_param + " created.") # Instantiate the refered class wrapper_entry = self.create_tool_wrapper_inst( "rule_" + s_toolwrapper, s_toolwrapper, dict_dict_dict_elm, input_entry, output_entry) wrapper_entry.relation_toolwrapper_to_execution = execution Logger.instance().debug("Object tool_python_path: " + s_toolwrapper + " created.") session.add(wrapper_entry) session.commit() session.rollback() TableInputOutputInformation.set_tables_properties( TableInputOutputInformation.get_execution_tables()) # commit /rollback trick to clean the session # totodo LucG ask lionel est-ce-que tu as deja eu ce problème à ne pas pouvoir faire des queries et des ajouts # dans la meme session? session.commit() session.rollback() # if not SQLManager.instance().d_database_config['db_connection'] == 'postgresql': # This command will create all the triggers that will create timestamp after modification TableModificationTime.create_triggers() # This create_all will create all models that have been found in the tool_python_path SQLManager.instance().create_all() wrapper_entry.is_content_respected() except NoResultFound as e: session.rollback() raise WopMarsException( "Error while parsing the configuration file. The database has not been setUp Correctly.", str(e))
def test_run(self): input_entry = TypeInputOrOutput(is_input=True) output_entry = TypeInputOrOutput(is_input=False) f1 = FileInputOutputInformation( file_key="input1", path="resource/input_files/input_file1.txt") f1.relation_file_or_tableioinfo_to_typeio = input_entry f2 = FileInputOutputInformation(file_key="output1", path="outdir/output_file1.txt") f2.relation_file_or_tableioinfo_to_typeio = output_entry t1 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") t1.set_table(FooBase) t1.relation_file_or_tableioinfo_to_typeio = output_entry timestamp_millis, timestamp_human = get_current_time() modification_table_entry = TableModificationTime( mtime_epoch_millis=timestamp_millis, table_name=t1.table_name) t1.modification = modification_table_entry tw1 = FooWrapper5(rule_name="rule1") tw1.relation_toolwrapper_to_fileioinfo.extend([f1, f2]) tw1.relation_toolwrapper_to_tableioinfo.append(t1) f12 = FileInputOutputInformation( file_key="input1", path="resource/input_files/input_file1.txt") f12.relation_file_or_tableioinfo_to_typeio = input_entry f22 = FileInputOutputInformation(file_key="output1", path="outdir/output_file1.txt") f22.relation_file_or_tableioinfo_to_typeio = output_entry t12 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") t12.set_table(FooBase) t12.relation_file_or_tableioinfo_to_typeio = output_entry timestamp_millis, timestamp_human = get_current_time() modification_table_entry = TableModificationTime( mtime_epoch_millis=timestamp_millis, table_name=t12.table_name) t12.modification = modification_table_entry tw2 = FooWrapper5(rule_name="rule2") tw2.relation_toolwrapper_to_fileioinfo.extend([f12, f22]) tw2.relation_toolwrapper_to_tableioinfo.append(t12) f13 = FileInputOutputInformation( file_key="input1", path="resource/input_files/input_file1.txt") f13.relation_file_or_tableioinfo_to_typeio = input_entry f23 = FileInputOutputInformation(file_key="output1", path="outdir/output_file1.txt") f23.relation_file_or_tableioinfo_to_typeio = output_entry t13 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") t13.set_table(FooBase) t13.relation_file_or_tableioinfo_to_typeio = output_entry timestamp_millis, timestamp_human = get_current_time() modification_table_entry = TableModificationTime( mtime_epoch_millis=timestamp_millis, table_name=t13.table_name) t13.modification = modification_table_entry tw3 = FooWrapper5(rule_name="rule3") tw3.relation_toolwrapper_to_fileioinfo.extend([f13, f23]) tw3.relation_toolwrapper_to_tableioinfo.append(t13) tt1 = ToolWrapperThread(tw1) tt2 = ToolWrapperThread(tw2) tt3 = ToolWrapperThread(tw3) tt1.start() tt2.start() tt3.start() tt1.join() tt2.join() tt3.join() self.assertEqual( len(SQLManager.instance().get_session().query(FooBase).filter( FooBase.name.like('Foowrapper5 - %')).all()), 3000)
def test_run_commit_vs_query(self): # this tests does not work with mysql and postgresql if not SQLManager.instance().engine.url.drivername in [ 'mysql', 'postgresql' ]: input_entry = TypeInputOrOutput(is_input=True) output_entry = TypeInputOrOutput(is_input=False) f1 = FileInputOutputInformation( file_key="input1", path="resource/input_files/input_file1.txt") f1.relation_file_or_tableioinfo_to_typeio = input_entry t1 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") t1.set_table(FooBase) t1.relation_file_or_tableioinfo_to_typeio = output_entry timestamp_millis, timestamp_human = get_current_time() modification_table_entry = TableModificationTime( mtime_epoch_millis=timestamp_millis, table_name=t1.table_name) t1.modification = modification_table_entry o1 = Option(name="rows", value="1000") tw1 = Add(rule_name="rule1") tw1.relation_toolwrapper_to_fileioinfo.append(f1) tw1.relation_toolwrapper_to_tableioinfo.append(t1) tw1.relation_toolwrapper_to_option.append(o1) f12 = FileInputOutputInformation( file_key="input1", path="resource/input_files/input_file1.txt") f12.relation_file_or_tableioinfo_to_typeio = input_entry t12 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") t12.set_table(FooBase) t12.relation_file_or_tableioinfo_to_typeio = output_entry timestamp_millis, timestamp_human = get_current_time() modification_table_entry = TableModificationTime( mtime_epoch_millis=timestamp_millis, table_name=t12.table_name) t12.modification = modification_table_entry o12 = Option(name="rows", value="1000") tw12 = Add(rule_name="rule1") tw12.relation_toolwrapper_to_fileioinfo.append(f12) tw12.relation_toolwrapper_to_tableioinfo.append(t12) tw12.relation_toolwrapper_to_option.append(o12) f13 = FileInputOutputInformation( file_key="input1", path="resource/input_files/input_file1.txt") f13.relation_file_or_tableioinfo_to_typeio = input_entry t13 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") t13.set_table(FooBase) t13.relation_file_or_tableioinfo_to_typeio = output_entry timestamp_millis, timestamp_human = get_current_time() modification_table_entry = TableModificationTime( mtime_epoch_millis=timestamp_millis, table_name=t13.table_name) t13.modification = modification_table_entry o13 = Option(name="rows", value="1000") tw13 = Add(rule_name="rule1") tw13.relation_toolwrapper_to_fileioinfo.append(f13) tw13.relation_toolwrapper_to_tableioinfo.append(t13) tw13.relation_toolwrapper_to_option.append(o13) tt1 = ToolWrapperThread(tw1) tt2 = ToolWrapperThread(tw12) tt3 = ToolWrapperThread(tw13) t21 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") t21.set_table(FooBase) t21.relation_file_or_tableioinfo_to_typeio = input_entry tw21 = Query(rule_name="rule1") tw21.relation_toolwrapper_to_tableioinfo.append(t21) t22 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") t22.set_table(FooBase) t22.relation_file_or_tableioinfo_to_typeio = input_entry tw22 = Query(rule_name="rule1") tw22.relation_toolwrapper_to_tableioinfo.append(t22) t23 = TableInputOutputInformation(model_py_path="FooBase", table_key="FooBase", table_name="FooBase") t23.set_table(FooBase) t23.relation_file_or_tableioinfo_to_typeio = input_entry tw23 = Query(rule_name="rule1") tw23.relation_toolwrapper_to_tableioinfo.append(t23) tt4 = ToolWrapperThread(tw21) tt5 = ToolWrapperThread(tw22) tt6 = ToolWrapperThread(tw23) tt4.start() tt1.start() tt2.start() tt3.start() time.sleep(5) tt5.start() tt6.start() tt1.join() tt2.join() tt3.join() tt4.join() tt5.join() tt6.join()
def run(self): """ Run the tool and fire events. :return: """ wopmars_session = SQLManager.instance().get_session() time_unix_ms, time_human = get_current_time() start = time_human try: # self.__tool_wrapper.set_session(wopmars_session) self.__tool_wrapper.session = wopmars_session # if the tool need to be executed because its output doesn't exist if self.__dry: # tool_wrapper skipped Logger.instance().info("ToolWrapper skipped: {} -> {}".format( self.__tool_wrapper.rule_name, self.__tool_wrapper.__class__.__name__)) # Logger.instance().info("ToolWrapper: " + str(self.__tool_wrapper.rule_name) + # " -> " + self.__tool_wrapper.__class__.__name__ + " skipped.") self.__tool_wrapper.set_execution_infos( start, time_human, "ALREADY_EXECUTED") else: Logger.instance().info("\n" + str(self.__tool_wrapper) + "\n" + "command line: \n\t" + self.get_command_line()) # if you shouldn't simulate if OptionManager.instance()["--dry-run"]: # dry run Logger.instance().debug( "Dry-run mode enabled. Execution skipped.") self.__tool_wrapper.set_execution_infos(status="DRY") else: # normal execution # if OptionManager.instance()["--touch"]: # dry run # Logger.instance().debug("Touch mode enabled.") # self.__tool_wrapper.touch() Logger.instance().info( "ToolWrapper: " + str(self.__tool_wrapper.rule_name) + " -> " + self.__tool_wrapper.__class__.__name__ + " started.") output_file_fields = self.__tool_wrapper.specify_output_file( ) for out_field in output_file_fields: out_file_path = self.__tool_wrapper.output_file( out_field) out_dir = os.path.dirname(out_file_path) pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True) #################################################################################################### # # Touch output files of tool wrapper # #################################################################################################### if OptionManager.instance()["--touch"]: # Just touch self.__tool_wrapper.touch() #################################################################################################### # # Normal run of tool wrapper # #################################################################################################### else: # Run self.__tool_wrapper.run() wopmars_session.commit() time_unix_ms, time_human = get_current_time() self.__tool_wrapper.set_execution_infos( start, time_human, "EXECUTED") except Exception as e: wopmars_session.rollback() self.__tool_wrapper.set_execution_infos(start, time_human, "ERROR") raise WopMarsException( "Error while executing rule " + self.__tool_wrapper.rule_name + " (ToolWrapper " + self.__tool_wrapper.tool_python_path + ")", "Full stack trace: \n" + str(traceback.format_exc())) finally: # todo twthread , fermer session # session_tw.close() pass self.fire_success()