示例#1
0
    def notify_success(self, thread_toolwrapper):
        """
        Handle thread_toolwrapper success by continuing the dag.

        :param thread_toolwrapper: ToolWrapper thread that just succeed
        :type thread_toolwrapper: :class:`~.wopmars.management.ToolWrapperThread.ToolWrapperThread`
        """
        self.__session.add(thread_toolwrapper.get_toolwrapper())
        self.__session.commit()

        dry_status = thread_toolwrapper.get_dry()
        # if not OptionManager.instance()["--dry-run"]:
        #     thread_toolwrapper.get_toolwrapper().set_args_time_and_size("output", dry_status)
        if dry_status is False and not OptionManager.instance()["--dry-run"]:
            Logger.instance(
            ).info("ToolWrapper {} -> {} has succeeded.".format(
                str(thread_toolwrapper.get_toolwrapper().rule_name),
                str(thread_toolwrapper.get_toolwrapper().__class__.__name__)))
        # Continue the dag execution from the tool_python_path that just finished.
        self.__already_runned.add(thread_toolwrapper.get_toolwrapper())
        self.__count_exec -= 1

        if len(self.__list_queue_buffer):
            Logger.instance().debug("Fill the queue with the buffer: " + str([
                t.get_toolwrapper().__class__.__name__
                for t in self.__list_queue_buffer
            ]))
        i = 0
        for tw_thread in self.__list_queue_buffer:
            self.__queue_exec.put(tw_thread)
            del self.__list_queue_buffer[i]
            i += 1

        self.execute_from(thread_toolwrapper.get_toolwrapper())
示例#2
0
文件: DAG.py 项目: aitgon/wopmars
    def __init__(self, set_tools=None):
        """
        The DAG can be build from a set of tools, analyzing the successors of each of them.

        ToolWrappers has a method "follows()" wich allow to know if one tool has a dependency for one other. The tools
        of the set_tools are compared each other to extract the dependencies.
        
        :param set_tools: A set of tools
        """
        # the DAG is a DiGraph
        super().__init__()
        # A nx digraph to store the dot graph
        self.dot_digraph = nx.DiGraph()
        Logger.instance().info("Building the execution DAG...")
        if set_tools:
            # for each tool
            for tool1 in set_tools:
                self.add_node(tool1)
                # for each other tool
                for tool2 in set_tools.difference(set([tool1])):
                    # is there a dependency between tool1 and tool2?
                    if tool1.follows(tool2):
                        self.add_edge(tool2, tool1)
        if set_tools:
            # for each tool
            for tool1 in set_tools:
                self.dot_digraph.add_node(tool1.dot_label())
                # for each other tool
                for tool2 in set_tools.difference(set([tool1])):
                    # is there a dependency between tool1 and tool2?
                    if tool1.follows(tool2):
                        self.dot_digraph.add_edge(tool2.dot_label(), tool1.dot_label())
        Logger.instance().debug("DAG built.")
示例#3
0
    def execute_from(self, tw=None):
        """
        Execute the dag from the toolwrappers in the list given.

        The next nodes are taken thanks to the "successors()" method of the DAG and are put into the queue.
        The "run_queue()" is then called.

        A trace of the already_runned ToolWrapper objects is kept in order to avoid duplicate execution.

        :param node: A node of the DAG or None, if it needs to be executed from the root.
        :type node: :class:`~.wopmars.framework.database.models.ToolWrapper.ToolWrapper`
        :return: void
        """
        # the first list will be the root nodes
        list_tw = self.__dag_to_exec.successors(tw)
        Logger.instance().debug("Next tools: " +
                                str([t.__class__.__name__ for t in list_tw]))

        for tw in list_tw:
            # every rule should be executed once and only once
            if tw not in self.__already_runned:
                # ToolWrapperThread object is a thread ready to start
                self.__queue_exec.put(ToolWrapperThread(tw))
            else:
                Logger.instance().debug("ToolWrapper: " + tw.rule_name +
                                        " -> " + tw.tool_python_path +
                                        " has already been executed. Pass.")
        self.run_queue()
示例#4
0
    def execute_from(self, tw=None):
        """
        Execute the dag from the toolwrappers in the list given.

        The next nodes are taken thanks to the "successors()" method of the DAG and are put into the queue.
        The "run_queue()" is then called.

        A trace of the already_runned ToolWrapper objects is kept in order to avoid duplicate execution.

        :param node: A node of the DAG or None, if it needs to be executed from the root.
        :type node: :class:`~.wopmars.framework.database.tables.ToolWrapper.ToolWrapper`
        :return: void
        """
        # the first list will be the root nodes
        list_tw = self.__dag_to_exec.successors(tw)
        Logger.instance().debug("Next tools: " + str([t.__class__.__name__ for t in list_tw]))

        for tw in list_tw:
            # every rule should be executed once and only once
            if tw not in self.__already_runned:
                # ToolThread object is a thread ready to start
                self.__queue_exec.put(ToolThread(tw))
            else:
                Logger.instance().debug("Rule: " + tw.name +
                                        " -> " + tw.toolwrapper +
                                        " has already been executed. Pass.")
        self.run_queue()
示例#5
0
 def run(self):
     Logger.instance().info(self.__class__.__name__ + " is running...")
     p1 = subprocess.Popen(["touch", self.output_file('output1')])
     p2 = subprocess.Popen(["touch", self.output_file('output2')])
     p1.wait()
     p2.wait()
     time.sleep(1)
示例#6
0
    def notify_success(self, thread_toolwrapper):
        """
        Handle thread_toolwrapper success by continuing the dag.

        :param thread_toolwrapper: ToolWrapper thread that just succeed
        :type thread_toolwrapper: :class:`~.wopmars.management.ToolThread.ToolThread`
        """
        self.__session.add(thread_toolwrapper.get_toolwrapper())
        self.__session.commit()

        dry_status = thread_toolwrapper.get_dry()
        # if not OptionManager.instance()["--dry-run"]:
        #     thread_toolwrapper.get_toolwrapper().set_args_time_and_size("output", dry_status)
        if dry_status is False and not OptionManager.instance()["--dry-run"]:
            Logger.instance().info("Rule " + str(thread_toolwrapper.get_toolwrapper().name) + " -> " + str(thread_toolwrapper.get_toolwrapper().__class__.__name__) + " has succeed.")
        # Continue the dag execution from the toolwrapper that just finished.
        self.__already_runned.add(thread_toolwrapper.get_toolwrapper())
        self.__count_exec -= 1

        if len(self.__list_queue_buffer):
            Logger.instance().debug("Fill the queue with the Buffer: " +
                                    str([t.get_toolwrapper().__class__.__name__ for t in self.__list_queue_buffer]))
        i = 0
        for tw_thread in self.__list_queue_buffer:
            self.__queue_exec.put(tw_thread)
            del self.__list_queue_buffer[i]
            i += 1

        self.execute_from(thread_toolwrapper.get_toolwrapper())
示例#7
0
    def run(self):
        """
        Get the dag then execute it.

        The database is setUp here if workflow side tables have not been created yet.

        The dag is taken thanks to the :meth:`~.wopmars.framework.parsing.Parser.Parser.parse` method of the parser. And then pruned by the :meth:`~.wopmars.framework.management.WorkflowManager.WorkflowManager.get_dag_to_exec` method
        which will set the right DAG to be executed.
        Then, :meth:`~.wopmars.framework.management.WorkflowManager.WorkflowManager.execute_from` is called with no argument to get the origin nodes.
        """

        # This create_all is supposed to only create workflow-management side tables (called "wom_*")
        SQLManager.instance().create_all()

        if OptionManager.instance()["--clear-history"]:
            Logger.instance().info("Deleting WoPMaRS history...")
            SQLManager.instance().drop_table_content_list(SQLManager.wom_table_names)

        # The following lines allow to create types 'input' and 'output' in the db if they don't exist.
        self.__session.get_or_create(Type, defaults={"id": 1}, name="input")
        self.__session.get_or_create(Type, defaults={"id": 2}, name="output")
        self.__session.commit()
        # Get the DAG representing the whole workflow
        self.__dag_tools = self.__parser.parse()
        # Build the DAG which is willing to be executed according
        self.get_dag_to_exec()
        # Start the execution at the root nodes
        if OptionManager.instance()["--forceall"] and not OptionManager.instance()["--dry-run"]:
            self.erase_output()
        self.execute_from()
示例#8
0
    def __init__(self, set_tools=None):
        """
        The DAG can be build from a set of tools, analyzing the successors of each of them.

        ToolWrappers has a method "follows()" wich allow to know if one tool has a dependency for one other. The tools
        of the set_tools are compared each other to extract the dependencies.
        
        :param set_tools: A set of tools
        """
        # the DAG is a DiGraph
        super().__init__()
        # A nx digraph to store the dot graph
        self.dot_digraph = nx.DiGraph()
        Logger.instance().info("Building the execution DAG...")
        if set_tools:
            # for each tool
            for tool1 in set_tools:
                self.add_node(tool1)
                # for each other tool
                for tool2 in set_tools.difference(set([tool1])):
                    # is there a dependency between tool1 and tool2?
                    if tool1.follows(tool2):
                        self.add_edge(tool2, tool1)
        if set_tools:
            # for each tool
            for tool1 in set_tools:
                self.dot_digraph.add_node(tool1.dot_label())
                # for each other tool
                for tool2 in set_tools.difference(set([tool1])):
                    # is there a dependency between tool1 and tool2?
                    if tool1.follows(tool2):
                        self.dot_digraph.add_edge(tool2.dot_label(),
                                                  tool1.dot_label())
        Logger.instance().debug("DAG built.")
示例#9
0
    def run(self):
        """
        Get the dag then execute it.

        The database is setUp here if workflow side tables have not been created yet.

        The dag is taken thanks to the :meth:`~.wopmars.framework.parsing.Parser.Parser.parse` method of the parser. And then pruned by the :meth:`~.wopmars.framework.management.WorkflowManager.WorkflowManager.get_dag_to_exec` method
        which will set the right DAG to be executed.
        Then, :meth:`~.wopmars.framework.management.WorkflowManager.WorkflowManager.execute_from` is called with no argument to get the origin nodes.
        """

        # This create_all is supposed to only create workflow-management side tables (called "wom_*")
        SQLManager.instance().create_all()

        if OptionManager.instance()["--clear-history"]:
            Logger.instance().info("Deleting WoPMaRS history...")
            SQLManager.instance().drop_table_content_list(SQLManager.wom_table_names)

        # The following lines allow to create types 'input' and 'output' in the db if they don't exist.
        self.__session.get_or_create(Type, defaults={"id": 1}, name="input")
        self.__session.get_or_create(Type, defaults={"id": 2}, name="output")
        self.__session.commit()
        # Get the DAG representing the whole workflow
        self.__dag_tools = self.__parser.parse()
        # Build the DAG which is willing to be executed according
        self.get_dag_to_exec()
        # Start the execution at the root nodes
        if OptionManager.instance()["--forceall"] and not OptionManager.instance()["--dry-run"]:
            self.erase_output()
        self.execute_from()
示例#10
0
    def is_ready(self):
        """
        Check if the file exists on the system.

        :return: boolean: True if it exists, false if not
        """
        Logger.instance().debug("Checking if " + self.file_key +
                                " is ready: " + self.path)
        return os.path.isfile(self.path)
示例#11
0
    def delete_content(self, table):
        """
        Delete the content of a given table.

        :param table: A mapper object representing the table in which the content should be deleted.
        """
        Logger.instance().debug("Deleting content of table " + table.__tablename__ + "...")
        self.__manager.execute(self.__session, table.__table__.delete())
        Logger.instance().debug("Content of table " + table.__tablename__ + " deleted.")
示例#12
0
 def run(self):
     Logger.instance().info(self.__class__.__name__ + " is running...")
     p = subprocess.Popen(["touch", self.output_file("output1")])
     p.wait()
     for i in range(10):
         f = self.output_table("FooBaseP")(name="Foowrapper5 - " + str(i))
         self.session.add(f)
     self.session.commit()
     time.sleep(1)
示例#13
0
 def run(self):
     Logger.instance().info(self.__class__.__name__ + " is running...")
     p = subprocess.Popen(["touch", self.output_file("output1")])
     p.wait()
     p2 = subprocess.Popen(["touch", self.output_file("output2")])
     p2.wait()
     p3 = subprocess.Popen(["touch", self.output_file("output3")])
     p3.wait()
     time.sleep(0.1)
示例#14
0
文件: IODbPut.py 项目: aitgon/wopmars
    def import_models(model_names):
        """
        Import all the given models

        :param model_names: The path to the models
        :type model_names: Iterable(String)
        """
        for t in model_names:
            Logger.instance().debug("IODbPut.import_models: importing " + str(t))
            importlib.import_module(t)
示例#15
0
 def run(self):
     Logger.instance().info(self.__class__.__name__ + " is running...")
     inputs = self.session.query(self.input_table("FooBaseP")).all()
     self.session.delete_content(self.output_table("FooBase2P"))
     for i in inputs:
         entry = self.output_table("FooBase2P")(name=i.name)
         self.session.add(entry)
     Logger.instance().info(
         self.session.query(self.output_table("FooBase2P")).all())
     time.sleep(1)
示例#16
0
 def run(self):
     Logger.instance().info(self.__class__.__name__ + " is running...")
     p = subprocess.Popen(["touch", self.output_file("output1")])
     p.wait()
     # self.session.delete_content(self.output_table("FooBase"))
     for i in range(1000):
         # import pdb; pdb.set_trace()
         f = self.output_table("FooBase")(name="Foowrapper5 - {}".format(i))
         self.session.add(f)
     self.session.commit()
示例#17
0
 def rollback(self):
     """
     Rollback changes on the database. Should be used in case of error.
     :return:
     """
     Logger.instance().debug("Operations to be rollbacked in session" + str(self.__session) + ": \n\tUpdates:\n\t\t" +
                             "\n\t\t".join([str(k) for k in self.__session.dirty]) +
                             "\n\tInserts:\n\t\t" +
                             "\n\t\t".join([str(k) for k in self.__session.new]))
     # call on SQLManager commit method to use the lock
     self.__manager.rollback(self.__session)
示例#18
0
    def execute(self, statement, *args, **kwargs):
        """
        Execute a statement of the given values.

        :param statement: SQLAlchemy statement (insert, delete, etc..)
        :param args: dict of values
        :param kwargs: exploded dict of values
        :return: return the result of the execution of the statement
        """
        Logger.instance().debug("WopmarsSession.execute(" + str(statement) + ", " + str(args) + ", " + str(kwargs) + ")")
        return self.__manager.execute(self.__session, statement, *args, **kwargs)
示例#19
0
 def drop_all(self):
     """
     Use the declarative Base to drop all tables found. Should only be used for testing purposes.
     """
     try:
         self.__lock.acquire_write()
         Logger.instance().debug("Dropping all tables...")
         Base.metadata.drop_all(self.__engine)
     finally:
         # Always release the lock
         self.__lock.release()
示例#20
0
 def drop_all(self):
     """
     Use the declarative Base to drop all tables found. Should only be used for testing purposes.
     """
     try:
         self.__lock.acquire_write()
         Logger.instance().debug("Dropping all tables...")
         Base.metadata.drop_all(self.__engine)
     finally:
         # Always release the lock
         self.__lock.release()
示例#21
0
    def import_models(model_names):
        """
        Import all the given models

        :param model_names: The path to the models
        :type model_names: Iterable(String)
        """
        for t in model_names:
            Logger.instance().debug("IODbPut.import_models: importing " +
                                    str(t))
            importlib.import_module(t)
示例#22
0
 def commit(self):
     """
     Validate changes on the database. Should be used when everything is ok.
     """
     if self.something():
         Logger.instance().debug(str(self.__session) + " is about to commit.")
         Logger.instance().debug("Operations to be commited in session" + str(self.__session) + ": \n\tUpdates:\n\t\t" +
                                 "\n\t\t".join([str(k) for k in self.__session.dirty]) +
                                 "\n\tInserts:\n\t\t" +
                                 "\n\t\t".join([str(k) for k in self.__session.new]))
         # call on SQLManager commit method to use the lock
         self.__manager.commit(self.__session)
示例#23
0
文件: IODbPut.py 项目: aitgon/wopmars
    def __init__(self, model, tablename):
        """
        self.__table is initialized to None and will contain the model of this IODbPut object.

        :param model: The path to the model
        :type model: str
        :param tablename: The name of the table associated with the model
        :type tablename: str
        """
        # The file containing the table should be in PYTHONPATH
        Base.__init__(self, model=model, tablename=tablename)
        Logger.instance().debug(str(model) + " model loaded. Tablename: " + str(tablename))
        self.__table = None
示例#24
0
    def create_all(self):
        """
        Use the declarative Base to create all tables found.

        If you want to create a table, you should be sure that it has been imported first.
        """
        try:
            self.__lock.acquire_write()
            Logger.instance().debug("Creating all tables...")
            Base.metadata.create_all(self.__engine)
            # Always release the lock
        finally:
            self.__lock.release()
示例#25
0
    def create_all(self):
        """
        Use the declarative Base to create all tables found.

        If you want to create a table, you should be sure that it has been imported first.
        """
        try:
            self.__lock.acquire_write()
            Logger.instance().debug("Creating all tables...")
            Base.metadata.create_all(self.__engine)
            # Always release the lock
        finally:
            self.__lock.release()
示例#26
0
    def create(self, tablename):
        """
        Use the declarative Base to create a table from its tablename.

        The tablename is the is_input of the base represented in the databse (independent of the Table model)
        :param tablename:
        """
        try:
            self.__lock.acquire_write()
            Logger.instance().debug("SQLManager.create(" + str(tablename) + "): create table " + str(tablename))
            Base.metadata.tables[tablename.split(".")[-1]].create(self.engine, checkfirst=True)
        finally:
            # Always release the lock
            self.__lock.release()
示例#27
0
文件: DAG.py 项目: aitgon/wopmars
    def write_dot(self, path):
        """
        Build the dot file.

        The .ps can be built from the dot file with the command line: "dot -Tps {filename}.dot - o {filename}.ps"
        """
        # To build .ps : dot -Tps {filename}.dot - o {filename}.ps
        nx.draw(self.dot_digraph)
        write_dot(self.dot_digraph, path)
        # building the openable file:
        list_popen = ["dot", "-Tps", path, "-o", path.rsplit("/", 1)[0] + "/" + path.rsplit("/", 1)[1].split(".")[-2] + ".ps"]
        Logger.instance().debug("SubProcess command line for .ps file: " + str(list_popen))
        p = subprocess.Popen(list_popen)
        p.wait()
示例#28
0
    def erase_output(self):
        """
        Erase the outputs of the DAG that will be executed in order to prevents conflicts.
        """
        list_tw = self.__dag_to_exec.nodes()
        set_files = set()
        set_tables = set()

        Logger.instance().info("Forced execution implies overwrite existing output. Erasing files and tables.")
        for tw in list_tw:
           [set_files.add(f.path) for f in tw.files if f.type.name == "output"]
           [set_tables.add(t.tablename) for t in tw.tables if t.type.name == "output"]

        s = ""
        for f_path in set_files:
            s += "\n" + f_path
            PathFinder.silentremove(f_path)
        Logger.instance().debug("Removed files:" + s)

        SQLManager.instance().drop_table_content_list(
            set(IODbPut.tablenames).intersection(set_tables))

        s = "\n"
        s += "\n".join(set_tables)
        Logger.instance().debug("Removed tables content:" + s)

        Logger.instance().info("Output files and tables from previous execution have been erased.")
示例#29
0
文件: IODbPut.py 项目: aitgon/wopmars
 def init_on_load(self):
     """
     This is used by SQLAlchemy to regenerate the right object when loading it from the database. Here, we need to
     get back the actual Model from the model name and store it in self.__table.
     """
     for table in IODbPut.tablemodelnames:
         mod = importlib.import_module(table)
         try:
             if table == self.model:
                 # todo tabling
                 self.__table = eval("mod." + self.model.split(".")[-1])
         except AttributeError as e:
             raise e
     Logger.instance().debug(self.tablename + " table class reloaded. Model: " + self.model)
示例#30
0
    def __init__(self, model, tablename):
        """
        self.__table is initialized to None and will contain the model of this IODbPut object.

        :param model: The path to the model
        :type model: str
        :param tablename: The name of the table associated with the model
        :type tablename: str
        """
        # The file containing the table should be in PYTHONPATH
        Base.__init__(self, model=model, tablename=tablename)
        Logger.instance().debug(
            str(model) + " model loaded. Tablename: " + str(tablename))
        self.__table = None
示例#31
0
    def erase_output(self):
        """
        Erase the outputs of the DAG that will be executed in order to prevents conflicts.
        """
        list_tw = self.__dag_to_exec.nodes()
        set_files = set()
        set_tables = set()

        Logger.instance().info("Forced execution implies overwrite existing output. Erasing files and tables.")
        for tw in list_tw:
           [set_files.add(f.path) for f in tw.files if f.type.name == "output"]
           [set_tables.add(t.tablename) for t in tw.tables if t.type.name == "output"]

        s = ""
        for f_path in set_files:
            s += "\n" + f_path
            PathFinder.silentremove(f_path)
        Logger.instance().debug("Removed files:" + s)

        SQLManager.instance().drop_table_content_list(
            set(IODbPut.tablenames).intersection(set_tables))

        s = "\n"
        s += "\n".join(set_tables)
        Logger.instance().debug("Removed tables content:" + s)

        Logger.instance().info("Output files and tables from previous execution have been erased.")
示例#32
0
    def create(self, tablename):
        """
        Use the declarative Base to create a table from its tablename.

        The tablename is the name of the base represented in the databse (independent of the Table model)
        :param tablename:
        """
        try:
            self.__lock.acquire_write()
            Logger.instance().debug("SQLManager.create(" + str(tablename) + "): create table " + str(tablename))
            Base.metadata.tables[tablename.split(".")[-1]].create(self.__engine, checkfirst=True)
        finally:
            # Always release the lock
            self.__lock.release()
示例#33
0
    def get_dag_to_exec(self):
        """
        Set the dag to exec in terms of --sourcerule option and --targetrule option.

        The source rule is checked first (there should not be both set because of the checks at the begining of the software)

        If sourcerule is set, then it is its successors that are searched in the whole dag.
        Else, it is its predecessors.

        The set of obtained rules are used to build the "dag_to_exec". The nodes returned by get_all_successors and
        get_all_predecessors are implicitly all related.
        """
        if OptionManager.instance()["--sourcerule"] is not None:
            try:
                # Get the rule asked by the user as 'sourcerule'
                node_from_rule = [n for n in self.__dag_tools if n.name == OptionManager.instance()["--sourcerule"]][0]
            except IndexError:
                raise WopMarsException(
                    "The given rule to start from: " + OptionManager.instance()["--sourcerule"] + " doesn't exist.")

            self.__dag_to_exec = DAG(self.__dag_tools.get_all_successors(node_from_rule))
            Logger.instance().info("Running the workflow from rule " + str(OptionManager.instance()["--sourcerule"]) +
                                   " -> " + node_from_rule.toolwrapper)
        elif OptionManager.instance()["--targetrule"] is not None:
            try:
                # Get the rule asked by the user as 'targetrule'
                node_from_rule = [n for n in self.__dag_tools if n.name == OptionManager.instance()["--targetrule"]][0]
            except IndexError:
                raise WopMarsException(
                    "The given rule to go to: " + OptionManager.instance()["--targetrule"] + " doesn't exist.")
            self.__dag_to_exec = DAG(self.__dag_tools.get_all_predecessors(node_from_rule))
            Logger.instance().info("Running the workflow to the rule " + str(OptionManager.instance()["--targetrule"]) +
                                   " -> " + node_from_rule.toolwrapper)
        else:
            self.__dag_to_exec = self.__dag_tools

        # ???
        # todo checkout what is going on here
        tables = []
        [tables.extend(tw.tables) for tw in self.__dag_to_exec.nodes()]
        IODbPut.set_tables_properties(tables)

        # For the tools that are in the workflow definition file but not in the executed dag, their status is set to
        # "NOT_PLANNED"
        for tw in set(self.__dag_tools.nodes()).difference(set(self.__dag_to_exec.nodes())):
            tw.set_execution_infos(status="NOT_PLANNED")
            self.__session.add(tw)

        self.__session.commit()
示例#34
0
    def get_dag_to_exec(self):
        """
        Set the dag to exec in terms of --sourcerule option and --targetrule option.

        The source rule is checked first (there should not be both set because of the checks at the begining of the software)

        If sourcerule is set, then it is its successors that are searched in the whole dag.
        Else, it is its predecessors.

        The set of obtained rules are used to build the "dag_to_exec". The nodes returned by get_all_successors and
        get_all_predecessors are implicitly all related.
        """
        if OptionManager.instance()["--sourcerule"] is not None:
            try:
                # Get the rule asked by the user as 'sourcerule'
                node_from_rule = [n for n in self.__dag_tools if n.name == OptionManager.instance()["--sourcerule"]][0]
            except IndexError:
                raise WopMarsException(
                    "The given rule to start from: " + OptionManager.instance()["--sourcerule"] + " doesn't exist.")

            self.__dag_to_exec = DAG(self.__dag_tools.get_all_successors(node_from_rule))
            Logger.instance().info("Running the workflow from rule " + str(OptionManager.instance()["--sourcerule"]) +
                                   " -> " + node_from_rule.toolwrapper)
        elif OptionManager.instance()["--targetrule"] is not None:
            try:
                # Get the rule asked by the user as 'targetrule'
                node_from_rule = [n for n in self.__dag_tools if n.name == OptionManager.instance()["--targetrule"]][0]
            except IndexError:
                raise WopMarsException(
                    "The given rule to go to: " + OptionManager.instance()["--targetrule"] + " doesn't exist.")
            self.__dag_to_exec = DAG(self.__dag_tools.get_all_predecessors(node_from_rule))
            Logger.instance().info("Running the workflow to the rule " + str(OptionManager.instance()["--targetrule"]) +
                                   " -> " + node_from_rule.toolwrapper)
        else:
            self.__dag_to_exec = self.__dag_tools

        # ???
        # todo checkout what is going on here
        tables = []
        [tables.extend(tw.tables) for tw in self.__dag_to_exec.nodes()]
        IODbPut.set_tables_properties(tables)

        # For the tools that are in the workflow definition file but not in the executed dag, their status is set to
        # "NOT_PLANNED"
        for tw in set(self.__dag_tools.nodes()).difference(set(self.__dag_to_exec.nodes())):
            tw.set_execution_infos(status="NOT_PLANNED")
            self.__session.add(tw)

        self.__session.commit()
示例#35
0
    def drop_table_content_list(self, list_str_table):
        """
        Remove a list of tables from the list of their tablenames.

        :param list_str_table: [String] the name of the tables.
        """
        session = self.get_session()
        # Get the list of Table objects from tablenames, then sort them according to their relationships / foreignkeys
        # and take the reverse to delete them in the right order (reverse of the correct order for creating them)
        list_obj_table = reversed(
            sort_tables([Base.metadata.tables[tablename.split(".")[-1]] for tablename in list_str_table]))
        for t in list_obj_table:
            Logger.instance().debug(
                "SQLManager.drop_table_content_list(" + str(list_str_table) + "): drop table content " + str(t.name))
            self.execute(session._session(), t.delete())
示例#36
0
    def drop_table_content_list(self, list_str_table):
        """
        Remove a list of tables from the list of their tablenames.

        :param list_str_table: [String] the is_input of the tables.
        """
        session = self.get_session()
        # Get the list of Table objects from tablenames, then sort them according to their relationships / foreignkeys
        # and take the reverse to delete them in the right order (reverse of the correct order for creating them)
        list_obj_table = reversed(
            sort_tables([Base.metadata.tables[tablename.split(".")[-1]] for tablename in list_str_table]))
        for t in list_obj_table:
            Logger.instance().debug(
                "SQLManager.drop_table_content_list(" + str(list_str_table) + "): drop table content " + str(t.name))
            self.execute(session._session(), t.delete())
示例#37
0
 def init_on_load(self):
     """
     This is used by SQLAlchemy to regenerate the right object when loading it from the database. Here, we need to
     get back the actual Model from the model name and store it in self.__table.
     """
     for table in IODbPut.tablemodelnames:
         mod = importlib.import_module(table)
         try:
             if table == self.model:
                 # todo tabling
                 self.__table = eval("mod." + self.model.split(".")[-1])
         except AttributeError as e:
             raise e
     Logger.instance().debug(self.tablename +
                             " table class reloaded. Model: " + self.model)
示例#38
0
    def pandas_read_sql(self, *args, **kwargs):
        """
        Execute the `pandas.read_sql <http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql.html?highlight=sql>`_ function

        :param args: The conventional arguments for the pandas.read_sql function.
        :param kwargs: The conventional key arguments for the pandas.read_sql function.
        :return DataFrame: The dataframe containing the results of the query.
        """
        try:
            self.__lock.acquire_read()
            df = pandas.read_sql(*args, **kwargs)
            Logger.instance().debug(
                "SQLManager.read_sql: Reading database using pandas")
        finally:
            self.__lock.release()
        return df
示例#39
0
    def test_commit(self):
        list_threads = [
            self.__t1, self.__t2, self.__t3, self.__t4, self.__t5, self.__t6
        ]
        try:
            for t in list_threads:
                t.start()

            for t in list_threads:
                t.join()
        except Exception as e:
            Logger.instance().error("Should not raise an exception")
        self.assertTrue(
            len(
                self.__local_session.query(FooBase).filter(
                    FooBase.name.like('string %')).all()) == 3000)
示例#40
0
    def pandas_to_sql(self, df, *args, **kwargs):
        """
        Execute the `DataFrame.to_sql <http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_sql.html>`_ function from pandas.DataFrame.

        :param df: The DataFrame to insert in database.
        :type df: `pandas.DataFrame <http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html>`_
        :param args: The conventional arguments for the pandas.to_sql function.
        :param kwargs: The conventional key arguments for the pandas.to_sql function.
        """
        try:
            self.__lock.acquire_write()
            df.to_sql(*args, **kwargs)
            Logger.instance().debug(
                "SQLManager.pandas_to_sql: Adding dataframe to database")
        finally:
            self.__lock.release()
 def init_on_load(self):
     """
     This is used by SQLAlchemy to regenerate the right object when loading it from the database. Here, we need to
     get back the actual Model from the model is_input and store it in self.__table.
     """
     for table in TableInputOutputInformation.tablemodelnames:
         mod = importlib.import_module(table)
         try:
             if table == self.model_py_path:
                 # toodo LucG tabling
                 self.__table = eval("mod." +
                                     self.model_py_path.split(".")[-1])
         except AttributeError as e:
             raise e
     Logger.instance().debug(self.table_key +
                             " table class reloaded. Model: " +
                             self.model_py_path)
示例#42
0
    def drop_table_list(self, list_str_table):
        """
        Remove a list of tables from the list of their tablenames.

        :param list_str_table: [String] the is_input of the tables.
        """
        # Get the list of Table objects from tablenames, then sort them according to their relationships / foreignkeys
        # and take the reverse to delete them in the right order (reverse of the correct order for creating them)
        # todo tabling
        list_obj_table = reversed(sort_tables([Base.metadata.tables[tablename.split(".")[-1]] for tablename in list_str_table]))
        try:
            self.__lock.acquire_write()
            for t in list_obj_table:
                Logger.instance().debug("SQLManager.drop_table_list(" + str(list_str_table) + "): drop table " + str(t.name))
                t.drop(self.engine, checkfirst=True)
        finally:
            # Always release the lock
            self.__lock.release()
示例#43
0
文件: Parser.py 项目: aitgon/wopmars
    def get_set_toolwrappers():
        """
        Ask the database for toolwrappers of the current execution.

        The current execution is defined as the one with the highest id (it is auto_incrementing)

        :return: Set([ToolWrapper]) the set of toolwrappers of the current execution.
        """
        session = SQLManager.instance().get_session()
        set_toolwrappers = set()
        try:
            # query asking the db for the highest execution id
            execution_id = session.query(func.max(ToolWrapper.execution_id))
            Logger.instance().debug("Getting toolwrappers of the current execution. id = " + str(execution_id.one()[0]))
            set_toolwrappers = set(session.query(ToolWrapper).filter(ToolWrapper.execution_id == execution_id).all())
        except NoResultFound as e:
            raise e
        return set_toolwrappers
    def __init__(self, model_py_path, table_key, table_name):
        """
        self.__table is initialized to None and will contain the model of this TableInputOutputInformation object.

        :param model_py_path: The path to the model
        :type model_py_path: str
        :param table_key: The is_input of the table associated with the model
        :type table_key: str
        """
        # The file containing the table should be in PYTHONPATH
        Base.__init__(self,
                      model_py_path=model_py_path,
                      table_key=table_key,
                      table_name=table_name)
        Logger.instance().debug(
            str(model_py_path) + " model_py_path loaded. Tablename: " +
            str(table_key))
        self.__table = None
示例#45
0
    def drop_table_list(self, list_str_table):
        """
        Remove a list of tables from the list of their tablenames.

        :param list_str_table: [String] the name of the tables.
        """
        # Get the list of Table objects from tablenames, then sort them according to their relationships / foreignkeys
        # and take the reverse to delete them in the right order (reverse of the correct order for creating them)
        # todo tabling
        list_obj_table = reversed(sort_tables([Base.metadata.tables[tablename.split(".")[-1]] for tablename in list_str_table]))
        try:
            self.__lock.acquire_write()
            for t in list_obj_table:
                Logger.instance().debug("SQLManager.drop_table_list(" + str(list_str_table) + "): drop table " + str(t.name))
                t.drop(self.__engine, checkfirst=True)
        finally:
            # Always release the lock
            self.__lock.release()
示例#46
0
    def result_factory(self, query, method):
        """
        Return the result of the query, using the demanded method.

        The result_factory wrap the methods for querying database:
          - all
          - first
          - one
          - one_or_none
          - scalar
          - count

        This is necessary to use the ReadLock of WopMars instead of the one from SQLite.

        :param query: The query object, ready to be performed.
        :param method: String signifying the method which have to be used for querying database
        :return: The result of the query.
        """
        result = None
        try:
            Logger.instance().debug("Executing query on session " +
                                    str(query.session) + ": \n" + str(query) +
                                    ";")
            Logger.instance().debug("WopMarsQuery " + str(query.session) +
                                    " want the read-lock on SQLManager")
            self.__lock.acquire_read()
            Logger.instance().debug(
                "\"" + str(query.session) +
                "\" has taken the read lock on SQLManager.")
            # switch case according to the demanded method.
            # in each condition, we call the superclass associated method: superclass is Query from sqlalchemy
            if method == "all":
                result = super(query.__class__, query).all()
            elif method == "one":
                result = super(query.__class__, query).one()
            elif method == "first":
                result = super(query.__class__, query).first()
            elif method == "count":
                result = super(query.__class__, query).count()
            elif method == "one_or_none":
                result = super(query.__class__, query).one_or_none()
            elif method == "scalar":
                result = super(query.__class__, query).scalar()
            else:
                raise WopMarsException(
                    "Error while querying the database.",
                    "Demanded operation doesn't exist: " + str(method))
        finally:
            # Always release the lock
            self.__lock.release()
            Logger.instance().debug(
                "\"" + str(query.session) +
                "\" has released the read lock on SQLManager.")
        return result
示例#47
0
文件: Reader.py 项目: aitgon/wopmars
    def check_duplicate_rules(s_workflow_file):
        """
        This method raises an exception if the workflow definition file contains duplicate rule names.

        The workflow definition file should contain rules with different name. It is therefore recommended to not
        call rules with tool names but functionality instead. Example:

            .. code-block:: yaml

                rule get_snp:
                    tool: SNPGetter
                    input:
                        file:
                            etc..
                        table:
                            etc..
                    output:
                        file:
                            etc..
                        table:
                            etc..
                    params:
                        etc..

        :param s_workflow_file: The content of the definition file
        :type s_workflow_file: str
        :raises WopMarsException: There is a duplicate rule name
        """
        Logger.instance().debug("Looking for duplicate rules...")
        # All rules are found using this regex.
        rules = re.findall(r'rule (.+?):', str(s_workflow_file))
        seen = set()
        # for each rule name
        for r in rules:
            # if the rule has not been seen before
            if r not in seen:
                # add it to the set of seen rules
                seen.add(r)
            else:
                # There is a duplicate rule name
                raise WopMarsException("Error while parsing the configuration file:\n\t",
                                       "The rule " + r + " is duplicated.")
        Logger.instance().debug("No Duplicate.")
示例#48
0
文件: IODbPut.py 项目: aitgon/wopmars
    def is_ready(self):
        """
        A IODbPut object is ready if its table exists and contains entries.

        :return: bool if the table is ready
        """
        session = SQLManager.instance().get_session()
        try:
            results = session.query(self.__table).first()
            if results is None:
                Logger.instance().debug("The table " + self.tablename + " is empty.")
                return False
        except OperationalError as e:
            Logger.instance().debug("The table " + self.__table.__tablename__ + " doesn't exist.")
            return False
        except Exception as e:
            session.rollback()
            raise e
            # todo twthread
        return True
示例#49
0
    def run(self):
        """
        Run the tool and fire events.
        :return:
        """

        session_tw = SQLManager.instance().get_session()
        start = time_unix_ms()
        try:
            self.__toolwrapper.set_session(session_tw)
            # if the tool need to be executed because its output doesn't exist
            if not self.__dry:
                Logger.instance().info(
                    "\n" + str(self.__toolwrapper) + "\n" + "command line: \n\t" + self.get_command_line())
                # if you shouldn't simulate
                if not OptionManager.instance()["--dry-run"]:
                    Logger.instance().info("Rule: " + str(self.__toolwrapper.name) + " -> " + self.__toolwrapper.__class__.__name__ + " started.")
                    # mkdir -p output dir: before running we need output dir
                    output_file_fields = self._ToolThread__toolwrapper.specify_output_file()
                    for out_field in output_file_fields:
                        out_file_path = self._ToolThread__toolwrapper.output_file(out_field)
                        out_dir = os.path.dirname(out_file_path)
                        try:
                            os.makedirs(out_dir)
                        except OSError as exception:
                            if exception.errno != errno.EEXIST:
                                raise
                    # end of mkdir -p output dir
                    self.__toolwrapper.run()
                    session_tw.commit()
                    self.__toolwrapper.set_execution_infos(start, time_unix_ms(), "EXECUTED")
                else:
                    Logger.instance().debug("Dry-run mode enabled. Execution skiped.")
                    self.__toolwrapper.set_execution_infos(status="DRY")
            else:
                Logger.instance().info("Rule: " + str(self.__toolwrapper.name) + " -> " + self.__toolwrapper.__class__.__name__ + " skiped.")
                self.__toolwrapper.set_execution_infos(start, time_unix_ms(), "ALREADY_EXECUTED")
        except Exception as e:
            session_tw.rollback()
            self.__toolwrapper.set_execution_infos(start, time_unix_ms(), "EXECUTION_ERROR")
            raise WopMarsException("Error while executing rule " + self.__toolwrapper.name +
                                   " (ToolWrapper " + self.__toolwrapper.toolwrapper + ")",
                                   "Full stack trace: \n" + str(traceback.format_exc()))
        finally:
            # todo twthread , fermer session
            # session_tw.close()
            pass
        self.fire_success()
示例#50
0
文件: Parser.py 项目: aitgon/wopmars
    def parse(self):
        """
        Organize the parsing of the Workflow Definition File or the Tool if only one tool is provided thanks to the
        tool command.

        Call the "read()" or the "load_one_toolwrapper" (depending on the use or not of tool command) method of the
        reader to insert in database the set of objects of the workflow.

        Then, the toolwrappers of the last execution are got back before calling the dag to build itself from the set of tools.

        The DAG is checked to actually being a Directed Acyclic Graph.

        If The "--dot" option is set, the dot and ps file are wrote here.

        :raise: WopMarsParsingException if the workflow is not a DAG.
        :return: the DAG
        """
        if not OptionManager.instance()["tool"]:
            self.__reader.read(OptionManager.instance()["--wopfile"])
        else:
            self.__reader.load_one_toolwrapper(OptionManager.instance()["TOOLWRAPPER"],
                                               OptionManager.instance()["--input"],
                                               OptionManager.instance()["--output"],
                                               OptionManager.instance()["--params"])
        # Get back the set of toolwrappers of the workflow before executing them.
        set_toolwrappers = self.get_set_toolwrappers()
        dag_tools = DAG(set_toolwrappers)
        if not is_directed_acyclic_graph(dag_tools):
            # todo find out the loop to specify it in the error message
            raise WopMarsException("Error while parsing the configuration file: \n\tThe workflow is malformed:",
                                   "The specified Workflow cannot be represented as a DAG.")
        s_dot_option = OptionManager.instance()["--dot"]
        if s_dot_option:
            Logger.instance().info("Writing the dot and ps files representing the workflow at " + str(s_dot_option))
            dag_tools.write_dot(s_dot_option)
            Logger.instance().debug("Dot and ps file wrote.")
        return dag_tools
示例#51
0
    def result_factory(self, query, method):
        """
        Return the result of the query, using the demanded method.

        The result_factory wrap the methods for querying database:
          - all
          - first
          - one
          - one_or_none
          - scalar
          - count

        This is necessary to use the ReadLock of WopMars instead of the one from SQLite.

        :param query: The query object, ready to be performed.
        :param method: String signifying the method which have to be used for querying database
        :return: The result of the query.
        """
        result = None
        try:
            Logger.instance().debug("Executing query on session " + str(query.session) + ": \n" + str(query) + ";")
            Logger.instance().debug("WopMarsQuery " + str(query.session) + " want the read-lock on SQLManager")
            self.__lock.acquire_read()
            Logger.instance().debug("\"" + str(query.session) + "\" has taken the read lock on SQLManager.")
            # switch case according to the demanded method.
            # in each condition, we call the superclass associated method: superclass is Query from sqlalchemy
            if method == "all":
                result = super(query.__class__, query).all()
            elif method == "one":
                result = super(query.__class__, query).one()
            elif method == "first":
                result = super(query.__class__, query).first()
            elif method == "count":
                result = super(query.__class__, query).count()
            elif method == "one_or_none":
                result = super(query.__class__, query).one_or_none()
            elif method == "scalar":
                result = super(query.__class__, query).scalar()
            else:
                raise WopMarsException("Error while querying the database.",
                                       "Demanded operation doesn't exist: " + str(method))
        finally:
            # Always release the lock
            self.__lock.release()
            Logger.instance().debug("\"" + str(query.session) + "\" has released the read lock on SQLManager.")
        return result
示例#52
0
    def execute(self, session, statement, *args, **kwargs):
        """
        Allow to execute a statement object on the given session.

        :param session: SQLAlchemy session object.
        :param statement: SQLAlchemy statement object.
        """
        Logger.instance().debug("SQLManager.execute(" + str(session) + ", " + str(statement) + ", " +
                                    str(args) + ", " + str(kwargs) + ")")
        try:
            Logger.instance().debug(str(session) + " want the write lock on SQLManager for statement \"" + str(statement) + "\"")
            self.__lock.acquire_write()
            Logger.instance().debug(str(session) + " has taken the write lock on SQLManager.")
            return session.execute(statement, *args, **kwargs)
        finally:
            # Always release the lock
            self.__lock.release()
            Logger.instance().debug(str(session) + " has released the write lock on SQLManager.")
示例#53
0
    def rollback(self, session):
        """
        Rollback the given session.

        The SQLManager wrap the sqlite queue for rollbacking operations on database in order to do not trigger the error due
        to the time-out operations.

        This is done thanks to a read_write_lock: sqlmanager is a synchronized singleton with synchronized methods.

        :param session: SQLAlchemy session object
        """
        try:
            Logger.instance().debug(str(session) + " want the write lock on SQLManager.")
            self.__lock.acquire_write()
            Logger.instance().debug(str(session) + " has taken the write lock on SQLManager.")
            session.rollback()
            Logger.instance().debug(str(session) + " has been rollbacked.")
        finally:
            # Always release the lock
            self.__lock.release()
            Logger.instance().debug(str(session) + " has released the write lock on SQLManager.")
示例#54
0
    def log(self, level, msg):
        """
        use by the toolwrapper developer in order to have a dedicated logger.

        :param level: The level of logging you need: "debug", "info", "warning", "error"
        :type level: str
        :param msg: The actual string to log.
        :type msg: str
        """
        if level == "debug":
            Logger.instance().toolwrapper_debug(msg, self.toolwrapper)
        elif level == "info":
            Logger.instance().toolwrapper_info(msg, self.toolwrapper)
        elif level == "warning":
            Logger.instance().toolwrapper_debug(msg, self.toolwrapper)
        elif level == "error":
            Logger.instance().toolwrapper_error(msg, self.toolwrapper)
        else:
            raise WopMarsException("Error in the Toolwrapper definition of method run()",
                                   "The is no logging level associated with " + str(level) + ". " +
                                   "The authorized ones are: debug, info, warning, error")
示例#55
0
    def set_args_time_and_size(self, type, dry=False):
        """
        WorkflowManager method:

        The time and the size of the files are set according to the actual time of last modification and size of the system files

        The time of the tables are set according to the time of last modification notified in the modification_table table
        If the type of IOPut is "output" and the execution is "not dry", the time in modification_table is set to the
        current time.time().

        # todo modify it to take commits into account isntead of the status of 'output' of a table

        :param type: "input" or "output"
        :type type: str
        :param dry: Say if the execution has been simulated.
        :type dry: bool
        """
        session = SQLManager.instance().get_session()
        for f in [f for f in self.files if f.type.name == type]:
            try:
                time = os_path_getmtime_ms(f.path)
                size = os.path.getsize(f.path)
            except FileNotFoundError as FE:
                # todo ask lionel sans ce rollback, ca bug, pourquoi? la session est vide... comme si la query etait bloquante
                if not OptionManager.instance()["--dry-run"]:
                    session.rollback()
                    raise WopMarsException("Error during the execution of the workflow",
                                           "The " + type + " file " + str(f.path) + " of rule " + str(self.name) +
                                           " doesn't exist")
                else:
                    # in dry-run mode, input/output files might not exist
                    time = None
                    size = None
            f.used_at = time
            f.size = size
            session.add(f)
            if type == "input":
                Logger.instance().debug("Input file " + str(f) + " used.")
            elif type == "output" and dry:
                Logger.instance().debug("Output file " + str(f) + " has been loaded from previous execution.")
            elif type == "output" and not dry:
                Logger.instance().debug("Output file " + str(f) + " has been created.")
        # this commit is due to a bug that i couldn't figure out: the session empty itself between the two loops...
        # this is not good at all since it may lead to inconsistence in the database
        session.commit()

        for t in [t for t in self.tables if t.type.name == type]:
            t.used_at = t.modification.time
            session.add(t)
        session.commit()
示例#56
0
    def run_queue(self):
        """
        Call start() method of all elements of the queue.

        The tools inside the queue are taken then their inputs are checked. If they are ready, the tools are started.
        If not, they are put in a buffer list of "not ready tools" or "ready but has not necessary ressources available
        tools".

        The start method is called with a dry argument, if it appears that the input of the ToolWrapper are the same
        than in a previous execution, and that the output are already ready. The dry parameter is set to True and the
        start method will only simulate the execution.

        After that, the code check for the state of the workflow and gather the informations to see if the workflow
        is finished, if it encounter an error or if it is currently running.

        :raises WopMarsException: The workflow encounter a problem and must stop.
        """

        #
        # # TODO THIS METHOD IS NOT THREAD-SAFE (peut etre que si, à voir)
        #

        # If no tools have been added to the queue:
        #  - All tools have been executed and the queue is empty, so nothing happens
        #  - There were remaing tools in the queue but they weren't ready, so they are tested again
        while not self.__queue_exec.empty():
            Logger.instance().debug("Queue size: " + str(self.__queue_exec.qsize()))
            Logger.instance().debug("Queue content: " + str(["rule: " + tt.get_toolwrapper().name + "->" +
                                                             tt.get_toolwrapper().toolwrapper for tt in self.__queue_exec.get_queue_tuple()]))
            # get the first element of the queue to execute
            thread_tw = self.__queue_exec.get()
            tw = thread_tw.get_toolwrapper()
            Logger.instance().debug("Current rule: " + tw.name + "->" + tw.toolwrapper)
            # check if the predecessors of a rule have been already executed: a rule shouldn't be executed if
            # its predecessors have not been executed yet
            if not self.all_predecessors_have_run(tw):
                Logger.instance().debug("Predecessors of rule: " + tw.name + " have not been executed yet.")
            # for running, either the inputs have to be ready or the dry-run mode is enabled
            elif tw.are_inputs_ready() or OptionManager.instance()["--dry-run"]:
                # the state of inputs (table and file) are set in the db here.
                tw.set_args_time_and_size("input")
                Logger.instance().debug("ToolWrapper ready: " + tw.toolwrapper)
                dry = False
                # if forceall option, then the tool is reexecuted anyway
                # check if the actual execution of the toolwrapper is necessary
                # every predecessors of the toolwrapper have to be executed (or simulated)
                if not OptionManager.instance()["--forceall"] and \
                        self.is_this_tool_already_done(tw) and \
                        not bool([node for node in self.__dag_to_exec.predecessors(tw) if node.status != "EXECUTED" and
                                        node.status != "ALREADY_EXECUTED"]):
                    Logger.instance().info("Rule: " + tw.name + " -> " + tw.toolwrapper +
                                           " seemed to have already" +
                                           " been runned with same" +
                                           " parameters.")
                    dry = True

                # todo twthread verification des ressources
                thread_tw.subscribe(self)
                self.__count_exec += 1
                # todo twthread methode start
                thread_tw.set_dry(dry)
                try:
                    # be carefull here: the execution of the toolthreads is recursive meaning that calls to function may
                    # be stacked (run -> notify success -> run(next tool) -> notify success(next tool) -> etc....
                    # todo twthread methode start
                    thread_tw.run()
                except Exception as e:
                    # as mentionned above, there may be recursive calls to this function, so every exception can
                    # pass here multiple times: this attribute is used for recognizing exception that have already been
                    # caught
                    if not hasattr(e, "teb_already_seen"):
                        setattr(e, "teb_already_seen", True)
                        tw.set_execution_infos(status="EXECUTION_ERROR")
                        self.__session.add(tw)
                        self.__session.commit()
                    raise e
            else:
                Logger.instance().debug("ToolWrapper not ready: rule: " + tw.name + " -> " + str(tw.toolwrapper))
                # The buffer contains the ToolWrappers that have inputs which are not ready yet.
                self.__list_queue_buffer.append(thread_tw)

        Logger.instance().debug("Buffer: " + str(["rule: " + t.get_toolwrapper().name + "->" +
                                                  t.get_toolwrapper().toolwrapper for t in self.__list_queue_buffer]))
        Logger.instance().debug("Running rules: " + str(self.__count_exec))

        # There is no more ToolWrapper that are waiting to be executed.
        # Is there some tools that are currently being executed?
        if self.__count_exec == 0:
            # Is there some tools that weren't ready?
            if len(self.__list_queue_buffer) == 0:
                # If there is no tool waiting and no tool being executed, the workflow has finished.
                finished_at = time_unix_ms()
                finished_at_strftime = datetime.datetime.fromtimestamp(finished_at/1000).strftime('%Y-%m-%d %H:%M:%S')
                Logger.instance().info("The workflow has completed. Finished at: " + finished_at_strftime)
                self.set_finishing_informations(finished_at, "FINISHED")
                SQLManager.instance().get_session().close()
                sys.exit(0)
            # uniquement en environnement multiThreadpredece
            elif not self.check_buffer():
                # If there is no tool being executed but there is that are waiting something, the workflow has an issue
                finished_at = time_unix_ms()
                tw_list = [t.get_toolwrapper() for t in self.__list_queue_buffer]
                if len(tw_list) > 0:
                    input_files_not_ready = tw_list[0].get_input_files_not_ready()
                    self.set_finishing_informations(finished_at, "ERROR")
                    raise WopMarsException("The workflow has failed.",
                                           "The inputs '{}' have failed for this tool '{}'".format(input_files_not_ready[0], tw_list[0].name))
示例#57
0
文件: Reader.py 项目: aitgon/wopmars
    def read(self, s_definition_file):
        """
        Reads the file given and insert the rules of the workflow in the database.

        The definition file is supposed to be properly formed. The validation of the content of the definition is done
        during the instanciation of the tools.

        :param: s_definition_file: String containing the path to the definition file.
        :type s_definition_file: str
        :raise: WopmarsException: The content is not validated
        """
        self.load_definition_file(s_definition_file)

        session = SQLManager.instance().get_session()

        # The dict_workflow_definition is assumed to be well formed
        try:
            # The same execution entry for the whole workflow-related database entries.
            execution = Execution(started_at=time_unix_ms())
            # get the types database entries that should have been created previously
            input_entry = session.query(Type).filter(Type.name == "input").one()
            output_entry = session.query(Type).filter(Type.name == "output").one()
            set_wrapper = set()
            # Encounter a rule block
            for rule in self.__dict_workflow_definition:
                str_wrapper_name = None
                # the name of the rule is extracted after the "rule" keyword. There shouldn't be a ":" but it costs nothing.
                str_rule_name = rule.split()[-1].strip(":")
                Logger.instance().debug("Encounter rule " + str_rule_name + ": \n" +
                                        str(DictUtils.pretty_repr(self.__dict_workflow_definition[rule])))
                # The dict of "input"s, "output"s and "params" is re-initialized for each wrapper
                dict_dict_dict_elm = dict(dict_input={"file": {}, "table": {}},
                                          dict_params={},
                                          dict_output={"file": {}, "table": {}})
                for key_second_step in self.__dict_workflow_definition[rule]:
                    # key_second_step is supposed to be "tool", "input", "output" or "params"
                    if type(self.__dict_workflow_definition[rule][key_second_step]) == dict:
                        # if it is a dict, then inputs, outputs or params are coming
                        for key_third_step in self.__dict_workflow_definition[rule][key_second_step]:
                            # todo tabling modification of the indentation levels + appearance of tables in file
                            if key_second_step == "params":
                                key = key_third_step
                                value = self.__dict_workflow_definition[rule][key_second_step][key_third_step]
                                obj_created = Option(name=key,
                                                     value=value)
                                dict_dict_dict_elm["dict_params"][key] = obj_created
                            else:
                                for key_fourth_step in self.__dict_workflow_definition[rule][key_second_step][key_third_step]:
                                    obj_created = None
                                    if key_third_step == "file":
                                        key = key_fourth_step
                                        str_path_to_file = os.path.join(OptionManager.instance()["--directory"],
                                                                        self.__dict_workflow_definition[rule][
                                                                            key_second_step][
                                                                            key_third_step][
                                                                            key])
                                        obj_created = IOFilePut(name=key,
                                                                path=os.path.abspath(str_path_to_file))

                                    elif key_third_step == "table":
                                        key = key_fourth_step
                                        modelname = self.__dict_workflow_definition[rule][
                                            key_second_step][
                                            key_third_step][
                                            key]
                                        obj_created = IODbPut(model=modelname, tablename=key)

                                        dict_dict_dict_elm["dict_" + key_second_step][
                                            key_third_step][
                                            key] = self.__dict_workflow_definition[rule][key_second_step][key_third_step][key]
                                    # all elements of the current rule block are stored in there
                                    # key_second_step is input or output here
                                    dict_dict_dict_elm["dict_" + key_second_step][key_third_step][key] = obj_created
                                    Logger.instance().debug("Object " + key_second_step + " " + key_third_step + ": " +
                                                            key + " created.")
                    else:
                        # if the step is not a dict, then it is supposed to be the "tool" line
                        str_wrapper_name = self.__dict_workflow_definition[rule][key_second_step]
                # At this point, "dict_dict_dict_elm" is like this:
                # {
                #     'dict_params': {
                #         'option1': Option('option1', 'valueofoption1')
                #     },
                #     'dict_input': {
                #         'file' : {
                #             'input1': IOFilePut('input1', 'path/to/input1')
                #         }
                #         'table': {
                #             'table1': IODbPut('table1', 'package.of.table1')
                #         }
                #     },
                # }

                # Instantiate the refered class and add it to the set of objects
                wrapper_entry = self.create_toolwrapper_entry(str_rule_name, str_wrapper_name, dict_dict_dict_elm, input_entry, output_entry)
                # Associating a toolwrapper to an execution
                wrapper_entry.execution = execution
                set_wrapper.add(wrapper_entry)
                Logger.instance().debug("Object toolwrapper: " + str_wrapper_name + " created.")
                # commit/rollback trick to clean the session - SQLAchemy bug suspected
                session.commit()
                session.rollback()
                # todo set_table_properties outside the rules loop to take into account all the tables at once
                # (error if one tool has a foreign key refering to a table that is not in its I/O put
            IODbPut.set_tables_properties(IODbPut.get_execution_tables())
            session.commit()
            session.rollback()
            # This create_all will create all tables that have been found in the toolwrapper
            # if not SQLManager.instance().d_database_config['db_connection'] == 'postgresql':
            # TODO: this function is not creating the triggers after the table in postgresql so I switched it off
            IODbPut.create_triggers()
            SQLManager.instance().create_all()
            session.add_all(set_wrapper)
            # save all operations done so far.
            session.commit()
            for tw in set_wrapper:
                tw.is_content_respected()

        except NoResultFound as e:
            session.rollback()
            raise WopMarsException("Error while parsing the configuration file. The database has not been setUp Correctly.",
                                   str(e))
示例#58
0
文件: Reader.py 项目: aitgon/wopmars
    def load_definition_file(self, s_definition_file):
        """
        Open the definition file and load it's content in a dictionnary thanks to the ``yaml`` library. ``yaml`` can
        raise an exception if the yaml specifications are not respected or if there is duplicates at the same level of
        hierarchy in the definition file. If so, the exception is caught then wrapped into a ``WopMarsException``.

        The check of the grammar of the definition file is done during this step but no tests are performed regarding
        to the actual content of the definition file.

        :param s_definition_file: Path to the definition file
        :type s_definition_file: str
        :raises WopMarsException: The yaml specifications are not respected
        """
        # Tests about grammar and syntax are performed here (file's existence is also tested here)
        try:
            with open(s_definition_file, 'r') as def_file:
                s_def_file_content = def_file.read()
            try:
                # The workflow definition file is loaded as-it in memory by the pyyaml library
                Logger.instance().info("Reading the Wopfile: " + str(s_definition_file))
                # Replace jinja2 variables with environment variable values
                #s_def_file_content = jinja2.Environment().from_string(s_def_file_content).render(os.environ)
                # Parse the file to find duplicates rule names (it is a double check with the following step)
                Reader.check_duplicate_rules(s_def_file_content)
                # Allows to raise an exception if duplicate keys are found on the same document hirearchy level.
                yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, Reader.no_duplicates_constructor)
                # The whole content of the definition file is loaded in this dict.
                # yaml.load return None if there is no content in the String
                self.__dict_workflow_definition = yaml.load(s_def_file_content) or {}
                if self.__dict_workflow_definition == {}:
                    Logger.instance().warning("The workflow definition file is empty")
                Logger.instance().debug("\n" + DictUtils.pretty_repr(self.__dict_workflow_definition))
                Logger.instance().debug("Read complete.")
                Logger.instance().info("Checking whether the file is well formed...")
                # raise an exception if there is a problem with the grammar
                self.is_grammar_respected()
                Logger.instance().debug("File well formed.")
            # YAMLError is thrown if the YAML specifications are not respected by the definition file
            except yaml.YAMLError as exc:
                raise WopMarsException("Error while parsing the configuration file: \n\t"
                                       "The YAML specification is not respected:", str(exc))
            except ConstructorError as CE:
                raise WopMarsException("Error while parsing the configuration file: \n\t",
                                       str(CE))
        except FileNotFoundError:
            raise WopMarsException("Error while parsing the configuration file: \n\tInput error:",
                                   "The specified file at " + s_definition_file + " doesn't exist.")
示例#59
0
    def are_inputs_ready(self):
        """
        Check if inputs are ready

        :return: bool - True if inputs are ready.
        """
        input_files = [f for f in self.files if f.type.name == "input"]
        Logger.instance().debug("Inputs files of " + str(self.__class__.__name__) + ": " + str([i.name for i in input_files]))
        for i in input_files:
            if not i.is_ready():
                Logger.instance().debug("Input: " + str(i.name) + " is not ready.")
                self.__state = ToolWrapper.NOT_READY
                return False
            Logger.instance().debug("Input: " + str(i.name) + " is ready.")

        input_tables = [t for t in self.tables if t.type.name == "input"]
        Logger.instance().debug("Inputs tables of " + str(self.__class__.__name__) + ": " + str([i.tablename for i in input_tables]))
        for i in input_tables:
            if not i.is_ready():
                Logger.instance().debug("Input: " + str(i.tablename) + " is not ready.")
                self.__state = ToolWrapper.NOT_READY
                return False
            Logger.instance().debug("Input: " + str(i.tablename) + " is ready.")

        self.__state = ToolWrapper.READY
        return True
示例#60
0
    def run(argv):
        """
        Entry-point of the program
        """

        # if the command line is malformed, docopt interrupt the software.
        try:
            if argv[1:] == []: # If not arguments, run the help
                argv.append('-h')
            OptionManager.instance().update(docopt(__doc__, argv=argv[1:]))
        except DocoptExit as SE:
            print("Bad argument in the command line: \n\t" + " ".join(argv) + "\n" + str(SE))
            sys.exit(2)
        try:
            schema_option = Schema({
                '--wopfile': Or("Wopfile", str),
                '--database': Use(PathFinder.check_database_valid_url),
                '-v': Or(0, And(int, lambda n: 1 <= n <= 2)),
                '--dot': Or(None, And(Use(PathFinder.check_valid_path), Use(PathFinder.check_pygraphviz))),
                "--log": Use(PathFinder.check_valid_path),
                '--printtools': Use(bool),
                "--sourcerule": Or(None, str),
                "--targetrule": Or(None, str),
                "--forceall": Use(bool),
                "--dry-run": Use(bool),
                "--directory": Use(PathFinder.create_workingdir),
                "--input": Use(DictUtils.str_to_dict),
                "--output": Use(DictUtils.str_to_dict),
                "--params": Use(DictUtils.str_to_dict),
                "TOOLWRAPPER": Or(None, Use(PathFinder.is_in_python_path)),
                "tool": Use(bool),
                "example": Use(bool),
                "example_snp": Use(bool),
                "--clear-history": Use(bool),
                "--toolwrapper-log": Use(bool)
            })
            # The option values are validated using schema library
            OptionManager.instance().validate(schema_option)
            os.chdir(OptionManager.instance()["--directory"])
        except SchemaError as schema_msg:
            Logger.instance().debug("\nCommand line Args:" + str(OptionManager.instance()))
            # regex for the different possible error messages.
            match_open_def = re.match(r"^open\('(.[^\)]+)'\)", str(schema_msg))
            match_dot_def = re.match(r"^check_valid_path\(('.[^\)]+')\)", str(schema_msg))
            match_wrong_key = re.match(r"^Wrong keys ('.[^\)]+')", str(schema_msg))
            match_pygraphviz = re.match(r".*dot.*", str(schema_msg))
            print(match_pygraphviz)
            # Check the different regex..
            if match_open_def:
                Logger.instance().error("The file " + match_open_def.group(1) + " cannot be opened. It may not exist.")
            elif match_dot_def:
                Logger.instance().error("The path " + match_dot_def.group(1) + " is not valid.")
            elif match_wrong_key:
                # Normally never reach
                Logger.instance().error("The option key " + match_wrong_key.group(1) + " is not known.")
            elif match_pygraphviz:
                Logger.instance().error("The dot file path is not valid or the pygraphviz module is not installed. In the second case, install wopmars with pygraphviz: pip install wopmars[pygraphviz]")
            else:
                # Normally never reach
                Logger.instance().error("An unknown error has occured. Message: " + str(schema_msg))
            sys.exit(2)

        Logger.instance().debug("\nCommand line Args:" + str(OptionManager.instance()))

        if OptionManager.instance()["example"]:
            ExampleBuilder().build()
            sys.exit(1)

        if OptionManager.instance()["example_snp"]:
            ExampleBuilder().build_snp()
            sys.exit(1)


        wm = WorkflowManager()
        try:
            wm.run()
        except WopMarsException as WE:
            Logger.instance().error(str(WE))
            session = SQLManager.instance().get_session()
            try:
                finished_at = time_unix_ms()
                Logger.instance().error("The workflow has encountered an error at: " + str(finished_at))
                wm.set_finishing_informations(finished_at, "ERROR")
            except AttributeError:
                session.rollback()
                Logger.instance().error("The execution has not even begun. No informations will be stored in the database.")
            except Exception as e:
                Logger.instance().error("An error occured during the rollback of the changement of the database which can be now unstable:" +
                                        str(e))
            sys.exit(1)