def is_options_respected(self): """ Parsing method: This method check if the params given in the constructor are properly formed for the tool. It checks if the params names given by the user exists or not, if the type correspond and if the required options are given. If not, throws a WopMarsParsingException. This method calls the "specify_params" method of the toolwrapper. This method should return a dictionnary associating the name of the option with a String containing the types allowed with it. A "|" is used between each types allowed for one option. Example: .. code-block:: python { 'option1': "int", 'option2': "required|str", } :raises WopMarsException: If the params names and types are not respected by the user. """ dict_wrapper_opt_carac = self.specify_params() # check if the given options are authorized if not set([opt.name for opt in self.options]).issubset(dict_wrapper_opt_carac): raise WopMarsException( "The content of the definition file is not valid.", "The given option variable for the rule " + str(self.name) + " -> " + self.__class__.__name__ + " are not correct, they should be in: " + "\n\t'{0}'".format("'\n\t'".join(dict_wrapper_opt_carac)) + "\n" + "They are:" + "\n\t'{0}'".format("'\n\t'".join( [opt.name for opt in self.options]))) # check if the types correspond for opt in self.options: opt.correspond(dict_wrapper_opt_carac[opt.name]) # check if the required options are given for opt in dict_wrapper_opt_carac: if "required" in str( dict_wrapper_opt_carac[opt]).lower() and opt not in [ opt2.name for opt2 in self.options ]: raise WopMarsException( "The content of the definition file is not valid.", "The option '" + opt + "' has not been provided but it is required.")
def is_output_respected(self): """ Parsing method: Check if the output dictionary given in the constructor is properly formed for the tool. It checks if the output variable names exists or not. If not, throws a WopMarsParsingException. This method calls the "specify_output_file" method which have been written by the toolwrapper developer. :raises WopMarsException: The output are not respected by the user. """ if set([ f_output.name for f_output in self.files if f_output.type.name == "output" ]) != set(self.specify_output_file()): raise WopMarsException( "The content of the definition file is not valid.", "The given output variable names for " + self.__class__.__name__ + " (rule " + str(self.name) + ")" + " are not correct, they should be: " + "\n\t'{0}'".format("'\n\t'".join(self.specify_output_file())) + "\n" + "They are:" + "\n\t'{0}'".format("'\n\t'".join( [f.name for f in self.files if f.type.name == "output"]))) set_output_table = set([ t_output for t_output in self.tables if t_output.type.name == "output" ]) set_output_table_names = set( [t_input.tablename for t_input in set_output_table]) if set_output_table_names != set(self.specify_output_table()): raise WopMarsException( "The content of the definition file is not valid.", "The given output table variable names for " + self.__class__.__name__ + " (rule " + str(self.name) + ")" + " are not correct, they should be: " + "\n\t'{0}'".format( "'\n\t'".join(self.specify_output_table())) + "\n" + "They are:" + "\n\t'{0}'".format("'\n\t'".join(set_output_table_names))) for t_output in set_output_table: s_tablename = t_output.tablename if s_tablename not in self.specify_output_table(): raise WopMarsException( "The content of the definition file is not valid.", "The given output tablenames for " + self.__class__.__name__ + " (rule " + str(self.name) + ")" + " is not correct. it should be in: " + "\n\t'{0}'".format("'\n\t'".join( self.specify_output_table())) + "\n" + "It is:" + "\n\t'" + s_tablename)
def get_dag_to_exec(self): """ Set the dag to exec in terms of --sourcerule option and --targetrule option. The source rule is checked first (there should not be both set because of the checks at the begining of the software) If sourcerule is set, then it is its successors that are searched in the whole dag. Else, it is its predecessors. The set of obtained rules are used to build the "dag_to_exec". The nodes returned by get_all_successors and get_all_predecessors are implicitly all related. """ if OptionManager.instance()["--sourcerule"] is not None: try: # Get the rule asked by the user as 'sourcerule' node_from_rule = [n for n in self.__dag_tools if n.name == OptionManager.instance()["--sourcerule"]][0] except IndexError: raise WopMarsException( "The given rule to start from: " + OptionManager.instance()["--sourcerule"] + " doesn't exist.") self.__dag_to_exec = DAG(self.__dag_tools.get_all_successors(node_from_rule)) Logger.instance().info("Running the workflow from rule " + str(OptionManager.instance()["--sourcerule"]) + " -> " + node_from_rule.toolwrapper) elif OptionManager.instance()["--targetrule"] is not None: try: # Get the rule asked by the user as 'targetrule' node_from_rule = [n for n in self.__dag_tools if n.name == OptionManager.instance()["--targetrule"]][0] except IndexError: raise WopMarsException( "The given rule to go to: " + OptionManager.instance()["--targetrule"] + " doesn't exist.") self.__dag_to_exec = DAG(self.__dag_tools.get_all_predecessors(node_from_rule)) Logger.instance().info("Running the workflow to the rule " + str(OptionManager.instance()["--targetrule"]) + " -> " + node_from_rule.toolwrapper) else: self.__dag_to_exec = self.__dag_tools # ??? # todo checkout what is going on here tables = [] [tables.extend(tw.tables) for tw in self.__dag_to_exec.nodes()] IODbPut.set_tables_properties(tables) # For the tools that are in the workflow definition file but not in the executed dag, their status is set to # "NOT_PLANNED" for tw in set(self.__dag_tools.nodes()).difference(set(self.__dag_to_exec.nodes())): tw.set_execution_infos(status="NOT_PLANNED") self.__session.add(tw) self.__session.commit()
def load_definition_file(self, s_definition_file): """ Open the definition file and load it's content in a dictionnary thanks to the ``yaml`` library. ``yaml`` can raise an exception if the yaml specifications are not respected or if there is duplicates at the same level of hierarchy in the definition file. If so, the exception is caught then wrapped into a ``WopMarsException``. The check of the grammar of the definition file is done during this step but no tests are performed regarding to the actual content of the definition file. :param s_definition_file: Path to the definition file :type s_definition_file: str :raises WopMarsException: The yaml specifications are not respected """ # Tests about grammar and syntax are performed here (file's existence is also tested here) try: with open(s_definition_file, 'r') as def_file: s_def_file_content = def_file.read() try: # The workflow definition file is loaded as-it in memory by the pyyaml library Logger.instance().info("Reading the Wopfile: " + str(s_definition_file)) # Replace jinja2 variables with environment variable values s_def_file_content = jinja2.Environment().from_string(s_def_file_content).render(os.environ) # Parse the file to find duplicates rule names (it is a double check with the following step) Reader.check_duplicate_rules(s_def_file_content) # Allows to raise an exception if duplicate keys are found on the same document hirearchy level. yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, Reader.no_duplicates_constructor) # The whole content of the definition file is loaded in this dict. # yaml.load return None if there is no content in the String self.__dict_workflow_definition = yaml.load(s_def_file_content) or {} if self.__dict_workflow_definition == {}: Logger.instance().warning("The workflow definition file is empty") Logger.instance().debug("\n" + DictUtils.pretty_repr(self.__dict_workflow_definition)) Logger.instance().debug("Read complete.") Logger.instance().info("Checking whether the file is well formed...") # raise an exception if there is a problem with the grammar self.is_grammar_respected() Logger.instance().debug("File well formed.") # YAMLError is thrown if the YAML specifications are not respected by the definition file except yaml.YAMLError as exc: raise WopMarsException("Error while parsing the configuration file: \n\t" "The YAML specification is not respected:", str(exc)) except ConstructorError as CE: raise WopMarsException("Error while parsing the configuration file: \n\t", str(CE)) except FileNotFoundError: raise WopMarsException("Error while parsing the configuration file: \n\tInput error:", "The specified file at " + s_definition_file + " doesn't exist.")
def copy(src, dest): try: shutil.copytree(src, dest, ignore=shutil.ignore_patterns('__pycache__')) except OSError as e: # If the error was caused because the source wasn't a directory if e.errno == errno.ENOTDIR: shutil.copy(src, dest) else: raise WopMarsException("Error while building the example", 'Directory not copied. Error: %s' % e)
def set_args_date_and_size(self, type, dry=False): """ WorkflowManager method: The date and the size of the files are set according to the actual date of last modification and size of the system files The date of the tables are set according to the date of last modification notified in the modification_table table If the type of IOPut is "output" and the execution is "not dry", the date in modification_table is set to the current time.time() datetime. # todo modify it to take commits into account isntead of the status of 'output' of a table :param type: "input" or "output" :type type: str :param dry: Say if the execution has been simulated. :type dry: bool """ session = SQLManager.instance().get_session() for f in [f for f in self.files if f.type.name == type]: try: date = datetime.datetime.fromtimestamp(os.path.getmtime( f.path)) size = os.path.getsize(f.path) except FileNotFoundError as FE: # todo ask lionel sans ce rollback, ca bug, pourquoi? la session est vide... comme si la query etait bloquante if not OptionManager.instance()["--dry-run"]: session.rollback() raise WopMarsException( "Error during the execution of the workflow", "The " + type + " file " + str(f.path) + " of rule " + str(self.name) + " doesn't exist") else: # in dry-run mode, input/output files might not exist date = None size = None f.used_at = date f.size = size session.add(f) if type == "input": Logger.instance().debug("Input file " + str(f) + " used.") elif type == "output" and dry: Logger.instance().debug( "Output file " + str(f) + " has been loaded from previous execution.") elif type == "output" and not dry: Logger.instance().debug("Output file " + str(f) + " has been created.") # this commit is due to a bug that i couldn't figure out: the session empty itself between the two loops... # this is not good at all since it may lead to inconsistence in the database session.commit() for t in [t for t in self.tables if t.type.name == type]: t.used_at = t.modification.date session.add(t) session.commit()
def result_factory(self, query, method): """ Return the result of the query, using the demanded method. The result_factory wrap the methods for querying database: - all - first - one - one_or_none - scalar - count This is necessary to use the ReadLock of WopMars instead of the one from SQLite. :param query: The query object, ready to be performed. :param method: String signifying the method which have to be used for querying database :return: The result of the query. """ result = None try: Logger.instance().debug("Executing query on session " + str(query.session) + ": \n" + str(query) + ";") Logger.instance().debug("WopMarsQuery " + str(query.session) + " want the read-lock on SQLManager") self.__lock.acquire_read() Logger.instance().debug( "\"" + str(query.session) + "\" has taken the read lock on SQLManager.") # switch case according to the demanded method. # in each condition, we call the superclass associated method: superclass is Query from sqlalchemy if method == "all": result = super(query.__class__, query).all() elif method == "one": result = super(query.__class__, query).one() elif method == "first": result = super(query.__class__, query).first() elif method == "count": result = super(query.__class__, query).count() elif method == "one_or_none": result = super(query.__class__, query).one_or_none() elif method == "scalar": result = super(query.__class__, query).scalar() else: raise WopMarsException( "Error while querying the database.", "Demanded operation doesn't exist: " + str(method)) finally: # Always release the lock self.__lock.release() Logger.instance().debug( "\"" + str(query.session) + "\" has released the read lock on SQLManager.") return result
def correspond(self, carac): """ Check if the option value correspond to the type given by the tool wrapper. Throws a WopMarsException if not. :raise: WopMarsException :param carac: String containing the carac of the option in the format: "carac1|carac2|carc3" :return: """ # get a list of caracteristics list_splitted_carac = carac.split("|") for s_type in list_splitted_carac: s_formated_type = s_type.strip().lower() # check if the carac is a autorized castable type if s_formated_type in Option.static_option_castable: try: # try the cast eval(s_formated_type)(self.value) except ValueError: # if it fails, raise an exception: the type has not been respected raise WopMarsException( "The content of the definition file is not valid.", "The given option value of " + str(self.name) + " should be of type " + s_formated_type) # if not, it may be "default" or "required" else: # we check if an option is required and has no default value if s_formated_type != Option.static_option_default and s_formated_type != Option.static_option_req: raise WopMarsException( "Malformed toolwrapper class.", "The toolwrapper " + str(self.rule.toolwrapper) + " of the rule " + str(self.rule.name) + " has an incorrect \"specify_params\" method wich is " + "associating the " + self.name + " option with an unknown type. " + "Found: " + s_type + " - Allowed: " + str(",".join(Option.static_option_castable)))
def output_file(self, key): """ Return the path of the specified output file. :param key: String the name of the variable containing the path :return: """ try: return [ f.path for f in self.files if f.name == key and f.type.name == "output" ][0] except IndexError: raise WopMarsException( "Error during the execution of the ToolWrapper " + str(self.toolwrapper) + " (rule " + self.name + ").", "The output file " + str(key) + " has not been specified.")
def output_table(self, key): """ Return the output table object of the given name. :param key: String: the name of the Table object. :return: """ try: return [ t for t in self.tables if t.tablename == key and t.type.name == "output" ][0].get_table() except IndexError: raise WopMarsException( "Error during the execution of the ToolWrapper " + str(self.toolwrapper) + " (rule " + self.name + ").", "The output table " + str(key) + " has not been specified.")
def check_duplicate_rules(s_workflow_file): """ This method raises an exception if the workflow definition file contains duplicate rule names. The workflow definition file should contain rules with different name. It is therefore recommended to not call rules with tool names but functionality instead. Example: .. code-block:: yaml rule get_snp: tool: SNPGetter input: file: etc.. table: etc.. output: file: etc.. table: etc.. params: etc.. :param s_workflow_file: The content of the definition file :type s_workflow_file: str :raises WopMarsException: There is a duplicate rule name """ Logger.instance().debug("Looking for duplicate rules...") # All rules are found using this regex. rules = re.findall(r'rule (.+?):', str(s_workflow_file)) seen = set() # for each rule name for r in rules: # if the rule has not been seen before if r not in seen: # add it to the set of seen rules seen.add(r) else: # There is a duplicate rule name raise WopMarsException("Error while parsing the configuration file:\n\t", "The rule " + r + " is duplicated.") Logger.instance().debug("No Duplicate.")
def parse(self): """ Organize the parsing of the Workflow Definition File or the Tool if only one tool is provided thanks to the tool command. Call the "read()" or the "load_one_toolwrapper" (depending on the use or not of tool command) method of the reader to insert in database the set of objects of the workflow. Then, the toolwrappers of the last execution are got back before calling the dag to build itself from the set of tools. The DAG is checked to actually being a Directed Acyclic Graph. If The "--dot" option is set, the dot and ps file are wrote here. :raise: WopMarsParsingException if the workflow is not a DAG. :return: the DAG """ if not OptionManager.instance()["tool"]: self.__reader.read(OptionManager.instance()["--wopfile"]) else: self.__reader.load_one_toolwrapper( OptionManager.instance()["TOOLWRAPPER"], OptionManager.instance()["--input"], OptionManager.instance()["--output"], OptionManager.instance()["--params"]) # Get back the set of toolwrappers of the workflow before executing them. set_toolwrappers = self.get_set_toolwrappers() dag_tools = DAG(set_toolwrappers) if not is_directed_acyclic_graph(dag_tools): # todo find out the loop to specify it in the error message raise WopMarsException( "Error while parsing the configuration file: \n\tThe workflow is malformed:", "The specified Workflow cannot be represented as a DAG.") s_dot_option = OptionManager.instance()["--dot"] if s_dot_option: Logger.instance().info( "Writing the dot and ps files representing the workflow at " + str(s_dot_option)) dag_tools.write_dot(s_dot_option) Logger.instance().debug("Dot and ps file wrote.") return dag_tools
def log(self, level, msg): """ use by the toolwrapper developer in order to have a dedicated logger. :param level: The level of logging you need: "debug", "info", "warning", "error" :type level: str :param msg: The actual string to log. :type msg: str """ if level == "debug": Logger.instance().toolwrapper_debug(msg, self.toolwrapper) elif level == "info": Logger.instance().toolwrapper_info(msg, self.toolwrapper) elif level == "warning": Logger.instance().toolwrapper_debug(msg, self.toolwrapper) elif level == "error": Logger.instance().toolwrapper_error(msg, self.toolwrapper) else: raise WopMarsException( "Error in the Toolwrapper definition of method run()", "The is no logging level associated with " + str(level) + ". " + "The authorized ones are: debug, info, warning, error")
def load_one_toolwrapper(self, s_toolwrapper, s_dict_inputs, s_dict_outputs, s_dict_params): """ Method called when the ``tool`` command is used. It is equivalent to the :meth:`~.wopmars.framework.parsing.Reader.Reader.read` method but create a workflow with only one toolwrapper. The workflow is also stored inside the database. :param s_toolwrapper: The name of the toolwrapper (will be imported) :type s_toolwrapper: str :param s_dict_inputs: A string containing the dict of input files :type s_dict_inputs: str :param s_dict_outputs: A string containing the dict of output files :type s_dict_outputs: str :param s_dict_params: A string containing the dict of params :type s_dict_params: str :raise WopMarsException: There is an error while accessing the database """ session = SQLManager.instance().get_session() dict_inputs = dict(eval(s_dict_inputs)) dict_outputs = dict(eval(s_dict_outputs)) dict_params = dict(eval(s_dict_params)) try: # The same execution entry for the whole workflow-related database entries. execution = Execution(started_at=datetime.datetime.fromtimestamp(time.time())) # get the types that should have been created previously input_entry = session.query(Type).filter(Type.name == "input").one() output_entry = session.query(Type).filter(Type.name == "output").one() Logger.instance().debug("Loading unique toolwrapper " + s_toolwrapper) dict_dict_dict_elm = dict(dict_input={"file": {}, "table": {}}, dict_params={}, dict_output={"file": {}, "table": {}}) for type in dict_inputs: if type == "file": for s_input in dict_inputs[type]: obj_created = IOFilePut(name=s_input, path=os.path.abspath(os.path.join(OptionManager.instance()["--directory"], dict_inputs[type][s_input]))) dict_dict_dict_elm["dict_input"][type][s_input] = obj_created Logger.instance().debug("Object input file: " + s_input + " created.") elif type == "table": for s_input in dict_inputs[type]: obj_created = IODbPut(model=dict_inputs[type][s_input], tablename=s_input) dict_dict_dict_elm["dict_input"][type][s_input] = obj_created Logger.instance().debug("Object input table: " + s_input + " created.") for type in dict_outputs: if type == "file": for s_output in dict_outputs[type]: obj_created = IOFilePut(name=s_output, path=os.path.abspath(os.path.join(OptionManager.instance()["--directory"], dict_outputs[type][s_output]))) dict_dict_dict_elm["dict_output"]["file"][s_output] = obj_created Logger.instance().debug("Object output file: " + s_output + " created.") elif type == "table": for s_output in dict_outputs[type]: obj_created = IODbPut(model=dict_outputs[type][s_output], tablename=s_output) dict_dict_dict_elm["dict_output"]["table"][s_output] = obj_created Logger.instance().debug("Object output table: " + s_output + " created.") for s_param in dict_params: obj_created = Option(name=s_param, value=dict_params[s_param]) dict_dict_dict_elm["dict_params"][s_param] = obj_created Logger.instance().debug("Object option: " + s_param + " created.") # Instantiate the refered class wrapper_entry = self.create_toolwrapper_entry("rule_" + s_toolwrapper, s_toolwrapper, dict_dict_dict_elm, input_entry, output_entry) wrapper_entry.execution = execution Logger.instance().debug("Object toolwrapper: " + s_toolwrapper + " created.") session.add(wrapper_entry) session.commit() session.rollback() IODbPut.set_tables_properties(IODbPut.get_execution_tables()) # commit /rollback trick to clean the session # todo ask lionel est-ce-que tu as deja eu ce problème à ne pas pouvoir faire des queries et des ajouts # dans la meme session? session.commit() session.rollback() # This create_all will create all tables that have been found in the toolwrapper # if not SQLManager.instance().d_database_config['db_connection'] == 'postgresql': # TODO: this function is not creating the triggers after the table in postgresql so I switched it off IODbPut.create_triggers() SQLManager.instance().create_all() wrapper_entry.is_content_respected() except NoResultFound as e: session.rollback() raise WopMarsException("Error while parsing the configuration file. The database has not been setUp Correctly.", str(e))
def read(self, s_definition_file): """ Reads the file given and insert the rules of the workflow in the database. The definition file is supposed to be properly formed. The validation of the content of the definition is done during the instanciation of the tools. :param: s_definition_file: String containing the path to the definition file. :type s_definition_file: str :raise: WopmarsException: The content is not validated """ self.load_definition_file(s_definition_file) session = SQLManager.instance().get_session() # The dict_workflow_definition is assumed to be well formed try: # The same execution entry for the whole workflow-related database entries. execution = Execution(started_at=datetime.datetime.fromtimestamp(time.time())) # get the types database entries that should have been created previously input_entry = session.query(Type).filter(Type.name == "input").one() output_entry = session.query(Type).filter(Type.name == "output").one() set_wrapper = set() # Encounter a rule block for rule in self.__dict_workflow_definition: str_wrapper_name = None # the name of the rule is extracted after the "rule" keyword. There shouldn't be a ":" but it costs nothing. str_rule_name = rule.split()[-1].strip(":") Logger.instance().debug("Encounter rule " + str_rule_name + ": \n" + str(DictUtils.pretty_repr(self.__dict_workflow_definition[rule]))) # The dict of "input"s, "output"s and "params" is re-initialized for each wrapper dict_dict_dict_elm = dict(dict_input={"file": {}, "table": {}}, dict_params={}, dict_output={"file": {}, "table": {}}) for key_second_step in self.__dict_workflow_definition[rule]: # key_second_step is supposed to be "tool", "input", "output" or "params" if type(self.__dict_workflow_definition[rule][key_second_step]) == dict: # if it is a dict, then inputs, outputs or params are coming for key_third_step in self.__dict_workflow_definition[rule][key_second_step]: # todo tabling modification of the indentation levels + appearance of tables in file if key_second_step == "params": key = key_third_step value = self.__dict_workflow_definition[rule][key_second_step][key_third_step] obj_created = Option(name=key, value=value) dict_dict_dict_elm["dict_params"][key] = obj_created else: for key_fourth_step in self.__dict_workflow_definition[rule][key_second_step][key_third_step]: obj_created = None if key_third_step == "file": key = key_fourth_step str_path_to_file = os.path.join(OptionManager.instance()["--directory"], self.__dict_workflow_definition[rule][ key_second_step][ key_third_step][ key]) obj_created = IOFilePut(name=key, path=os.path.abspath(str_path_to_file)) elif key_third_step == "table": key = key_fourth_step modelname = self.__dict_workflow_definition[rule][ key_second_step][ key_third_step][ key] obj_created = IODbPut(model=modelname, tablename=key) dict_dict_dict_elm["dict_" + key_second_step][ key_third_step][ key] = self.__dict_workflow_definition[rule][key_second_step][key_third_step][key] # all elements of the current rule block are stored in there # key_second_step is input or output here dict_dict_dict_elm["dict_" + key_second_step][key_third_step][key] = obj_created Logger.instance().debug("Object " + key_second_step + " " + key_third_step + ": " + key + " created.") else: # if the step is not a dict, then it is supposed to be the "tool" line str_wrapper_name = self.__dict_workflow_definition[rule][key_second_step] # At this point, "dict_dict_dict_elm" is like this: # { # 'dict_params': { # 'option1': Option('option1', 'valueofoption1') # }, # 'dict_input': { # 'file' : { # 'input1': IOFilePut('input1', 'path/to/input1') # } # 'table': { # 'table1': IODbPut('table1', 'package.of.table1') # } # }, # } # Instantiate the refered class and add it to the set of objects wrapper_entry = self.create_toolwrapper_entry(str_rule_name, str_wrapper_name, dict_dict_dict_elm, input_entry, output_entry) # Associating a toolwrapper to an execution wrapper_entry.execution = execution set_wrapper.add(wrapper_entry) Logger.instance().debug("Object toolwrapper: " + str_wrapper_name + " created.") # commit/rollback trick to clean the session - SQLAchemy bug suspected session.commit() session.rollback() # todo set_table_properties outside the rules loop to take into account all the tables at once # (error if one tool has a foreign key refering to a table that is not in its I/O put IODbPut.set_tables_properties(IODbPut.get_execution_tables()) session.commit() session.rollback() # This create_all will create all tables that have been found in the toolwrapper # if not SQLManager.instance().d_database_config['db_connection'] == 'postgresql': # TODO: this function is not creating the triggers after the table in postgresql so I switched it off IODbPut.create_triggers() SQLManager.instance().create_all() session.add_all(set_wrapper) # save all operations done so far. session.commit() for tw in set_wrapper: tw.is_content_respected() except NoResultFound as e: session.rollback() raise WopMarsException("Error while parsing the configuration file. The database has not been setUp Correctly.", str(e))
def create_toolwrapper_entry(self, str_rule_name, str_wrapper_name, dict_dict_dict_elm, input_entry, output_entry): """ Actual creating of the Toolwrapper object. The toolwrapper object is an entry of the table rule in the resulting database. If the scoped_session has current modification, they probably will be commited during this method: tables are created and this can only be done with clean session. :param str_rule_name: Contains the name of the rule in which the toolwrapper will be used. :type str_rule_name: str :param str_wrapper_name: Contains the name of the toolwrapper. It will be used for importing the correct module and then for creating the class :type str_wrapper_name: str :param dict_dict_dict_elm: "input"s "output"s and "params" and will be used to make relations between options / input / output and the toolwrapper. :type dict_dict_dict_elm: dict(dict(dict())) :param input_entry: input entry :type input_entry: :class:`wopmars.framework.bdd.tables.Type.Type` :param output_entry: output entry :type output_entry: :class:`wopmars.framework.bdd.tables.Type.Type` :return: TooLWrapper instance """ session = SQLManager.instance().get_session() # Importing the module in the mod variable try: mod = importlib.import_module(str_wrapper_name) # Building the class object toolwrapper_class = eval("mod." + str_wrapper_name.split('.')[-1]) except AttributeError: raise WopMarsException("Error while parsing the configuration file: \n\t", "The class " + str_wrapper_name + " doesn't exist.") except ImportError as IE: if str_wrapper_name in str(IE): raise WopMarsException("Error while parsing the configuration file:", str_wrapper_name + " module is not in the pythonpath. ") else: raise WopMarsException("Error while parsing the configuration file:", str_wrapper_name + " module contains an ImportError: " + str(IE)) # Initialize the instance of ToolWrapper toolwrapper_wrapper = toolwrapper_class(rule_name=str_rule_name) # associating ToolWrapper instances with their files / tables for elm in dict_dict_dict_elm["dict_input"]: if elm == "file": for input_f in dict_dict_dict_elm["dict_input"][elm]: # set the type of IOFilePut object iofileput_entry = dict_dict_dict_elm["dict_input"][elm][input_f] iofileput_entry.type = input_entry try: # associating file and toolwrapper toolwrapper_wrapper.files.append(iofileput_entry) except ObjectDeletedError as e: raise WopMarsException("Error in the toolwrapper class declaration. Please, notice the developer", "The error is probably caused by the lack of the 'polymorphic_identity' attribute" " in the toolwrapper. Error message: \n" + str(e)) elif elm == "table": for input_t in dict_dict_dict_elm["dict_input"][elm]: # input_t is the name of the table (not the model) # this is a preventing commit because next statement will create a new table and the session has to # be clean. I think it is a bug in SQLAlchemy which not allows queries then insert statements in # the same session session.commit() iodbput_entry = dict_dict_dict_elm["dict_input"][elm][input_t] # the user-side tables are created during the reading of the definition file # table_entry = IODbPut(name=dict_dict_dict_elm["dict_input"][elm][input_t], tablename=input_t) # insert in the database the date of last modification of a developper-side table modification_table_entry, created = session.get_or_create(ModificationTable, defaults={ "date": datetime.datetime.fromtimestamp( time.time())}, table_name=input_t) iodbput_entry.modification = modification_table_entry iodbput_entry.type = input_entry try: toolwrapper_wrapper.tables.append(iodbput_entry) except ObjectDeletedError as e: raise WopMarsException("Error in the toolwrapper class declaration. Please, notice the developer", "The error is probably caused by the lack of the 'polymorphic_identity' attribute" " in the toolwrapper. Error message: \n" + str(e)) for elm in dict_dict_dict_elm["dict_output"]: if elm == "file": for output_f in dict_dict_dict_elm["dict_output"][elm]: iofileput_entry = dict_dict_dict_elm["dict_output"][elm][output_f] iofileput_entry.type = output_entry try: toolwrapper_wrapper.files.append(iofileput_entry) except ObjectDeletedError as e: raise WopMarsException("Error in the toolwrapper class declaration. Please, notice the developer", "The error is probably caused by the lack of the 'polymorphic_identity' attribute" " in the toolwrapper. Error message: \n" + str(e)) elif elm == "table": for output_t in dict_dict_dict_elm["dict_output"][elm]: # output_t is the table name (not the model) session.commit() iodbput_entry = dict_dict_dict_elm["dict_output"][elm][output_t] modification_table_entry, created = session.get_or_create(ModificationTable, defaults={ "date": datetime.datetime.fromtimestamp( time.time())}, table_name=output_t) iodbput_entry.modification = modification_table_entry iodbput_entry.type = output_entry try: toolwrapper_wrapper.tables.append(iodbput_entry) except ObjectDeletedError as e: raise WopMarsException( "Error in the toolwrapper class declaration. Please, notice the developer", "The error is probably caused by the lack of the 'polymorphic_identity' attribute" " in the toolwrapper. Error message: \n" + str( e)) for opt in dict_dict_dict_elm["dict_params"]: # associating option and toolwrapper toolwrapper_wrapper.options.append(dict_dict_dict_elm["dict_params"][opt]) # toolwrapper_wrapper.is_content_respected() return toolwrapper_wrapper
def run(self): """ Run the tool and fire events. :return: """ session_tw = SQLManager.instance().get_session() start = datetime.datetime.fromtimestamp(time.time()) try: self.__toolwrapper.set_session(session_tw) # if the tool need to be executed because its output doesn't exist if not self.__dry: Logger.instance().info("\n" + str(self.__toolwrapper) + "\n" + "command line: \n\t" + self.get_command_line()) # if you shouldn't simulate if not OptionManager.instance()["--dry-run"]: Logger.instance().info( "Rule: " + str(self.__toolwrapper.name) + " -> " + self.__toolwrapper.__class__.__name__ + " started.") # mkdir -p output dir: before running we need output dir output_file_fields = self._ToolThread__toolwrapper.specify_output_file( ) for out_field in output_file_fields: out_file_path = self._ToolThread__toolwrapper.output_file( out_field) out_dir = os.path.dirname(out_file_path) try: os.makedirs(out_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise # end of mkdir -p output dir self.__toolwrapper.run() session_tw.commit() self.__toolwrapper.set_execution_infos( start, datetime.datetime.fromtimestamp(time.time()), "EXECUTED") else: Logger.instance().debug( "Dry-run mode enabled. Execution skiped.") self.__toolwrapper.set_execution_infos(status="DRY") else: Logger.instance().info("Rule: " + str(self.__toolwrapper.name) + " -> " + self.__toolwrapper.__class__.__name__ + " skiped.") self.__toolwrapper.set_execution_infos( start, datetime.datetime.fromtimestamp(time.time()), "ALREADY_EXECUTED") except Exception as e: session_tw.rollback() self.__toolwrapper.set_execution_infos( start, datetime.datetime.fromtimestamp(time.time()), "EXECUTION_ERROR") raise WopMarsException( "Error while executing rule " + self.__toolwrapper.name + " (ToolWrapper " + self.__toolwrapper.toolwrapper + ")", "Full stack trace: \n" + str(traceback.format_exc())) finally: # todo twthread , fermer session # session_tw.close() pass self.fire_success()
def run_queue(self): """ Call start() method of all elements of the queue. The tools inside the queue are taken then their inputs are checked. If they are ready, the tools are started. If not, they are put in a buffer list of "not ready tools" or "ready but has not necessary ressources available tools". The start method is called with a dry argument, if it appears that the input of the ToolWrapper are the same than in a previous execution, and that the output are already ready. The dry parameter is set to True and the start method will only simulate the execution. After that, the code check for the state of the workflow and gather the informations to see if the workflow is finished, if it encounter an error or if it is currently running. :raises WopMarsException: The workflow encounter a problem and must stop. """ # # # TODO THIS METHOD IS NOT THREAD-SAFE (peut etre que si, à voir) # # If no tools have been added to the queue: # - All tools have been executed and the queue is empty, so nothing happens # - There were remaing tools in the queue but they weren't ready, so they are tested again while not self.__queue_exec.empty(): Logger.instance().debug("Queue size: " + str(self.__queue_exec.qsize())) Logger.instance().debug("Queue content: " + str(["rule: " + tt.get_toolwrapper().name + "->" + tt.get_toolwrapper().toolwrapper for tt in self.__queue_exec.get_queue_tuple()])) # get the first element of the queue to execute thread_tw = self.__queue_exec.get() tw = thread_tw.get_toolwrapper() Logger.instance().debug("Current rule: " + tw.name + "->" + tw.toolwrapper) # check if the predecessors of a rule have been already executed: a rule shouldn't be executed if # its predecessors have not been executed yet if not self.all_predecessors_have_run(tw): Logger.instance().debug("Predecessors of rule: " + tw.name + " have not been executed yet.") # for running, either the inputs have to be ready or the dry-run mode is enabled elif tw.are_inputs_ready() or OptionManager.instance()["--dry-run"]: # the state of inputs (table and file) are set in the db here. tw.set_args_date_and_size("input") Logger.instance().debug("ToolWrapper ready: " + tw.toolwrapper) dry = False # if forceall option, then the tool is reexecuted anyway # check if the actual execution of the toolwrapper is necessary # every predecessors of the toolwrapper have to be executed (or simulated) if not OptionManager.instance()["--forceall"] and \ self.is_this_tool_already_done(tw) and \ not bool([node for node in self.__dag_to_exec.predecessors(tw) if node.status != "EXECUTED" and node.status != "ALREADY_EXECUTED"]): Logger.instance().info("Rule: " + tw.name + " -> " + tw.toolwrapper + " seemed to have already" + " been runned with same" + " parameters.") dry = True # todo twthread verification des ressources thread_tw.subscribe(self) self.__count_exec += 1 # todo twthread methode start thread_tw.set_dry(dry) try: # be carefull here: the execution of the toolthreads is recursive meaning that calls to function may # be stacked (run -> notify success -> run(next tool) -> notify success(next tool) -> etc.... # todo twthread methode start thread_tw.run() except Exception as e: # as mentionned above, there may be recursive calls to this function, so every exception can # pass here multiple times: this attribute is used for recognizing exception that have already been # caught if not hasattr(e, "teb_already_seen"): setattr(e, "teb_already_seen", True) tw.set_execution_infos(status="EXECUTION_ERROR") self.__session.add(tw) self.__session.commit() raise e else: Logger.instance().debug("ToolWrapper not ready: rule: " + tw.name + " -> " + str(tw.toolwrapper)) # The buffer contains the ToolWrappers that have inputs which are not ready yet. self.__list_queue_buffer.append(thread_tw) Logger.instance().debug("Buffer: " + str(["rule: " + t.get_toolwrapper().name + "->" + t.get_toolwrapper().toolwrapper for t in self.__list_queue_buffer])) Logger.instance().debug("Running rules: " + str(self.__count_exec)) # There is no more ToolWrapper that are waiting to be executed. # Is there some tools that are currently being executed? if self.__count_exec == 0: # Is there some tools that weren't ready? if len(self.__list_queue_buffer) == 0: # If there is no tool waiting and no tool being executed, the workflow has finished. finished_at = datetime.datetime.fromtimestamp(time.time()) Logger.instance().info("The workflow has completed. Finished at: " + str(finished_at)) self.set_finishing_informations(finished_at, "FINISHED") SQLManager.instance().get_session().close() sys.exit(0) # uniquement en environnement multiThreadpredece elif not self.check_buffer(): # If there is no tool being executed but there is that are waiting something, the workflow has an issue finished_at = datetime.datetime.fromtimestamp(time.time()) self.set_finishing_informations(finished_at, "ERROR") raise WopMarsException("The workflow has failed.", "The inputs are not ready for the remaining tools: " + ", \n".join([t.get_toolwrapper().toolwrapper + " -> rule: " + t.get_toolwrapper().name for t in self.__list_queue_buffer]) + ". ")
def is_input_respected(self): """ Parsing method: Check if the input file variables names associated with the toolwrapper are ok according to the toolwrapper developer. It checks if the input variable names exists or not. If not, throws a WopMarsParsingException. This method calls the :meth:`~.wopmars.framework.database.ToolWrapper.ToolWrapper.specify_input_file` method which have been written by the toolwrapper developer. :raise WopMarsException: The input are not respected by the user. """ set_input_file_names = set([ f_input.name for f_input in self.files if f_input.type.name == "input" ]) # check if the input file names for the ToolWrapper are coherent with the ToolWrapper specifications if set_input_file_names != set(self.specify_input_file()): raise WopMarsException( "The content of the definition file is not valid.", "The given input file variable names for " + self.__class__.__name__ + " (rule " + str(self.name) + ")" + " are not correct, they should be: " + "\n\t'{0}'".format("'\n\t'".join(self.specify_input_file())) + "\n" + "They are:" + "\n\t'{0}'".format("'\n\t'".join(set_input_file_names))) set_input_table = set([ t_input for t_input in self.tables if t_input.type.name == "input" ]) set_input_table_names = set( [t_input.tablename for t_input in set_input_table]) # check if the input table names for the ToolWrapper are coherent with the ToolWrapper specifications # this condition may be a duplicate... # todo to fix? if set_input_table_names != set(self.specify_input_table()): raise WopMarsException( "The content of the definition file is not valid.", "The given input table variable names for " + self.__class__.__name__ + " (rule " + str(self.name) + ")" + " are not correct, they should be: " + "\n\t'{0}'".format("'\n\t'".join(self.specify_input_table())) + "\n" + "They are:" + "\n\t'{0}'".format("'\n\t'".join(set_input_table_names))) for t_input in set_input_table: s_tablename = t_input.tablename if s_tablename not in self.specify_input_table(): raise WopMarsException( "The content of the definition file is not valid.", "The given input tablenames for " + self.__class__.__name__ + " (rule " + str(self.name) + ")" + " is not correct. it should be in: " + "\n\t'{0}'".format("'\n\t'".join( self.specify_input_table())) + "\n" + "It is:" + "\n\t'" + s_tablename) s_tablename_of_model = t_input.get_table().__tablename__ if s_tablename_of_model not in self.specify_input_table(): raise WopMarsException( "The content of the definition file is not valid.", "The given tablename of model for " + self.__class__.__name__ + " (rule " + str(self.name) + ")" + " is not correct. it should be in: " + "\n\t'{0}'".format("'\n\t'".join( self.specify_input_table())) + "\n" + "It is:" + "\n\t'" + s_tablename_of_model)
def is_grammar_respected(self): """ Check if the definition file respects the grammar. Throw a WopMarsException exception if not. The formal representation of the grammar is:: WoPMaRS = rule identifier = String ni = NEWLINE INDENT rule = "rule" identifier ":" ruleparams ruleparams = [ni tool] [ni input] [ni output] [ni params] filesortables = (ni files|ni tables){0-2} files = "file" ":" (ni identifier ”:” stringliteral)+ tables = "table" ":" (ni identifier ”:” stringliteral)+ tool = "tool" ":" stringliteral input = "input" ":" ni filesortables output = "output" ":" ni filesortables params = "params" ":" (ni identifier ”:” stringliteral)+ (NEWLINE WoPMaRS)+ :raises WopMarsException: The grammar is not respected """ exemple_file_def = """ rule RULENAME: tool: TOOLNAME input: file: INPUTNAME: INPUTVALUE table: - path.to.table output: file: OUTPUTNAME: OUTPUTVALUE table: - path.to.table params: OPTIONNAME: OPTIONVALUE rule ...etc... """ # recognize the rule blocks regex_step1 = re.compile(r"(^rule [^\s]+$)") # recognize the elements of the rule regex_step2 = re.compile(r"(^params$)|(^tool$)|(^input$)|(^output$)") # recognize the file/table blocks regex_step3 = re.compile(r"(^file$)|(^table$)") # The words found are tested against the regex to see if they match or not for s_key_step1 in self.__dict_workflow_definition: bool_toolwrapper = False # The first level of indentation should only contain rules if not regex_step1.search(s_key_step1): raise WopMarsException("Error while parsing the configuration file: \n\t" "The grammar of the WopMars's definition file is not respected:", "The line containing:\'" + str(s_key_step1) + "\' doesn't match the grammar: it should start with 'rule'" + "and contains only one word after the 'rule' keyword" + "\nexemple:" + exemple_file_def) for s_key_step2 in self.__dict_workflow_definition[s_key_step1]: # the second level of indentation should only contain elements of rule if not regex_step2.search(s_key_step2): raise WopMarsException("Error while parsing the configuration file: \n\t" "The grammar of the WopMars's definition file is not respected:", "The line containing:'" + str(s_key_step2) + "'" + " for rule '" + str(s_key_step1) + "'" + " doesn't match the grammar: it should be " + "'tool', 'params', 'input' or 'output'" + "\nexemple:" + exemple_file_def) elif s_key_step2 == "input" or s_key_step2 == "output": for s_key_step3 in self.__dict_workflow_definition[s_key_step1][s_key_step2]: if not regex_step3.search(s_key_step3): raise WopMarsException("Error while parsing the configuration file: \n\t" "The grammar of the WopMars's definition file is not respected:", "The line containing:'" + str(s_key_step3) + "'" + " for rule '" + str(s_key_step1) + "'" + " doesn't match the grammar: it should be " + "'file' or 'table'" + "\nexemple:" + exemple_file_def) elif s_key_step3 == "file": for s_variable_name in self.__dict_workflow_definition[s_key_step1][s_key_step2][s_key_step3]: if type(self.__dict_workflow_definition[s_key_step1][s_key_step2][s_key_step3][s_variable_name]) != str: raise WopMarsException("Error while parsing the configuration file: \n\t" + "The grammar of the WopMars's definition file is not respected:", "The line containing:'" + str(s_variable_name) + "'" + " for rule '" + str(s_key_step1) + "'" + " doesn't match the grammar: it should be the string containing the path to the file." "\nexemple:" + exemple_file_def) elif s_key_step3 == "table": for s_tablename in self.__dict_workflow_definition[s_key_step1][s_key_step2][s_key_step3]: if type(s_tablename) != str: raise WopMarsException("Error while parsing the configuration file: \n\t" "The grammar of the WopMars's definition file is not respected:", "The line containing:'" + str(s_variable_name) + "'" + " for rule '" + str(s_key_step1) + "'" + " doesn't match the grammar: it should be the string containing the name of the Model." "\nexemple:" + exemple_file_def) # There should be one tool at max in each rule elif s_key_step2 == "tool": if bool_toolwrapper == False: bool_toolwrapper = True elif bool_toolwrapper == True: raise WopMarsException("Error while parsing the configuration file: \n\t", "There is multiple tools specified for the " + str(s_key_step1)) # All rules should contain a tool if not bool_toolwrapper: raise WopMarsException("Error while parsing the configuration file: \n\t" "The grammar of the WopMars's definition file is not respected:", "The rule '" + str(s_key_step1) + "' doesn't contain any tool." + "\nexemple:" + exemple_file_def )