Пример #1
0
    def __init__(self, workdir, manager, pickle_protocol=-1):
        """
        Args:
            workdir:
                String specifying the directory where the workflows will be produced.
            manager:
                `TaskManager` object responsible for the submission of the jobs.
            pickle_procol:
                Pickle protocol version used for saving the status of the object.
                -1 denotes the latest version supported by the python interpreter.
        """
        self.workdir = os.path.abspath(workdir)
        self.creation_date = time.asctime()

        self.manager = manager.deepcopy()

        # List of workflows.
        self._works = []

        # List of callbacks that must be executed when the dependencies reach S_OK
        self._callbacks = []

        # Directories with (input|output|temporary) data.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

        self.pickle_protocol = int(pickle_protocol)
Пример #2
0
    def set_workdir(self, workdir, chroot=False):
        """Set the working directory. Cannot be set more than once unless chroot is True"""
        if not chroot and hasattr(self, "workdir") and self.workdir != workdir:
            raise ValueError("self.workdir != workdir: %s, %s" % (self.workdir,  workdir))

        # Directories with (input|output|temporary) data.
        self.workdir = os.path.abspath(workdir)
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))
Пример #3
0
    def set_workdir(self, workdir):
        """Set the working directory. Cannot be set more than once."""

        if hasattr(self, "workdir") and self.workdir != workdir:
            raise ValueError("self.workdir != workdir: %s, %s" % (self.workdir,  workdir))

        self.workdir = os.path.abspath(workdir)
                                                                       
        # Directories with (input|output|temporary) data.
        # The workflow will use these directories to connect 
        # itself to other workflows and/or to produce new data 
        # that will be used by its children.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))
Пример #4
0
    def set_workdir(self, workdir):
        """Set the working directory."""

        self.workdir = os.path.abspath(workdir)

        # Files required for the execution.
        self.input_file = File(os.path.join(self.workdir, "run.abi"))
        self.output_file = File(os.path.join(self.workdir, "run.abo"))
        self.files_file = File(os.path.join(self.workdir, "run.files"))
        self.log_file = File(os.path.join(self.workdir, "run.log"))
        self.stderr_file = File(os.path.join(self.workdir, "run.err"))

        # Directories with input|output|temporary data.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))
Пример #5
0
    def set_workdir(self, workdir, chroot=False):
        """
        Set the working directory. Cannot be set more than once unless chroot is True
        """
        if not chroot and hasattr(self, "workdir") and self.workdir != workdir:
            raise ValueError("self.workdir != workdir: %s, %s" %
                             (self.workdir, workdir))

        # Directories with (input|output|temporary) data.
        self.workdir = os.path.abspath(workdir)
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))
Пример #6
0
    def set_workdir(self, workdir):
        """Set the working directory."""

        self.workdir = os.path.abspath(workdir)

        # Files required for the execution.
        self.input_file = File(os.path.join(self.workdir, "run.abi"))
        self.output_file = File(os.path.join(self.workdir, "run.abo"))
        self.files_file = File(os.path.join(self.workdir, "run.files"))
        self.log_file = File(os.path.join(self.workdir, "run.log"))
        self.stderr_file = File(os.path.join(self.workdir, "run.err"))

        # Directories with input|output|temporary data.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))
Пример #7
0
    def __init__(self,
                 workdir,
                 manager,
                 auto_restart=False,
                 pickle_protocol=-1):
        """
        Args:
            workdir:
                String specifying the directory where the workflows will be produced.
            manager:
                `TaskManager` object responsible for the submission of the jobs.
            auto_restart:
                True if unconverged calculations should be restarted automatically.
            pickle_procol:
                Pickle protocol version used for saving the status of the object.
                -1 denotes the latest version supported by the python interpreter.
        """
        super(AbinitFlow, self).__init__()

        self.workdir = os.path.abspath(workdir)
        self.creation_date = time.asctime()

        self.manager = manager.deepcopy()
        self.auto_restart = auto_restart

        # List of workflows.
        self._works = []

        # List of callbacks that must be executed when the dependencies reach S_OK
        self._callbacks = []

        # Directories with (input|output|temporary) data.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

        self.pickle_protocol = int(pickle_protocol)
Пример #8
0
class AbinitFlow(collections.Iterable):
    """
    This object is a container of workflows. Its main task is managing the 
    possible inter-depedencies among the workflows and the creation of
    dynamic worfflows that are generates by callbacks registered by the user.

    .. attributes:

        creation_date:
            String with the creation_date

        pickle_protocol: 
            Protocol for Pickle database (default: -1 i.e. latest protocol)
    """
    VERSION = "0.1"

    PICKLE_FNAME = "__AbinitFlow__.pickle"

    def __init__(self, workdir, manager, pickle_protocol=-1):
        """
        Args:
            workdir:
                String specifying the directory where the workflows will be produced.
            manager:
                `TaskManager` object responsible for the submission of the jobs.
            pickle_procol:
                Pickle protocol version used for saving the status of the object.
                -1 denotes the latest version supported by the python interpreter.
        """
        self.workdir = os.path.abspath(workdir)
        self.creation_date = time.asctime()

        self.manager = manager.deepcopy()

        # List of workflows.
        self._works = []

        # List of callbacks that must be executed when the dependencies reach S_OK
        self._callbacks = []

        # Directories with (input|output|temporary) data.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

        self.pickle_protocol = int(pickle_protocol)

    def __repr__(self):
        return "<%s at %s, workdir=%s>" % (self.__class__.__name__, id(self), self.workdir)

    def __str__(self):
        return "<%s, workdir=%s>" % (self.__class__.__name__, self.workdir)

    def __len__(self):
        return len(self.works)

    def __iter__(self):
        return self.works.__iter__()

    def __getitem__(self, slice):
        return self.works[slice]

    #def __hash__(self):
    #    return hash(self.workdir)

    #def __eq__(self, other):
    #    if other is None or not isinstance(other, Flow):
    #        return False
    #    return  self.workdir == other.workdir

    #def __ne__(self, other):
    #    return not self == other

    @property
    def works(self):
        """List of `Workflow` objects contained in self.."""
        return self._works

    #@property
    #def completed(self):
    #    """True if all the tasks of the flow have reached S_OK."""
    #    return all(task.status == task.S_OK for task in self.flat_tasks())

    #def iflat_tasks(self):
    #    for work in self:
    #        for task in work:
    #            yield task

    @property
    def ncpus_reserved(self):
        """
        Returns the number of CPUs reserved in this moment.
        A CPUS is reserved if it's still not running but 
        we have submitted the task to the queue manager.
        """
        return sum(work.ncpus_reverved for work in self)

    @property
    def ncpus_allocated(self):
        """
        Returns the number of CPUs allocated in this moment.
        A CPU is allocated if it's running a task or if we have
        submitted a task to the queue manager but the job is still pending.
        """
        return sum(work.ncpus_allocated for work in self)

    @property
    def ncpus_inuse(self):
        """
        Returns the number of CPUs used in this moment.
        A CPU is used if there's a job that is running on it.
        """
        return sum(work.ncpus_inuse for work in self)

    def used_ids(self):
        """
        Returns a set with all the ids used so far to identify `Task` and `Workflow`.
        """
        ids = []
        for work in self:
            ids.append(work.node_id)
            for task in work:
                ids.append(task.node_id)

        used_ids = set(ids)
        assert len(ids_set) == len(ids)

        return used_ids

    def generate_new_nodeid(self):
        """Returns an unused node identifier."""
        used_ids = self.used_ids()

        for nid in itertools.count():
            if nid not in used_ids:
                return nid

    def check_status(self):
        """Check the status of the workflows in self."""
        for work in self:
            work.check_status()

        # Test whether some task must be restarted.
        #num_restarts = 0
        #for work in self:
        #    for task in work:
        #        if task.not_converged():
        #            retcode = task.restart_if_needed(self):
        #            if retcode == 0: num_restarts += 1
        #if num_restarts:
        #    self.pickle_dump()

    def build(self, *args, **kwargs):
        self.indir.makedirs()
        self.outdir.makedirs()
        self.tmpdir.makedirs()

        for work in self:
            work.build(*args, **kwargs)

    def build_and_pickle_dump(self):
        """
        Build dirs and file of the `Flow` and save the object in pickle format.

        Returns:
            0 if success
        """
        self.build()
        return self.pickle_dump()

    def pickle_dump(self):
        """
        Save the status of the object in pickle format.

        Returns:
            0 if success
        """
        protocol = self.pickle_protocol
        filepath = os.path.join(self.workdir, self.PICKLE_FNAME)

        with FileLock(filepath) as lock:
            with open(filepath, mode="w" if protocol == 0 else "wb") as fh:
                pickle.dump(self, fh, protocol=protocol)

        # Atomic transaction.
        #filepath_new = filepath + ".new"
        #filepath_save = filepath + ".save"
        #shutil.copyfile(filepath, filepath_save)

        #try:
        #    with open(filepath_new, mode="w" if protocol == 0 else "wb") as fh:
        #        pickle.dump(self, fh, protocol=protocol)

        #    os.rename(filepath_new, filepath)
        #except IOError:
        #    os.rename(filepath_save, filepath)
        #finally:
        #    try
        #        os.remove(filepath_save)
        #    except:
        #        pass
        #    try
        #        os.remove(filepath_new)
        #    except:
        #        pass
        return 0

    @classmethod
    def pickle_load(cls, filepath):
        """
        Load the object from a pickle file and performs initial setup.
        """
        with FileLock(filepath) as lock:
            with open(filepath, "rb") as fh:
                flow = pickle.load(fh)

        flow.connect_signals()

        # Recompute the status of each task since tasks that
        # have been submitted previously might be completed.
        flow.check_status()
        return flow

    def register_task(self, input, deps=None, manager=None, task_class=None):
        """
        Utility function that generates a `Workflow` made of a single task

        Args:
            input:
                Abinit Input file or `Strategy` object of `Task` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            task_class:
                Task subclass to instantiate. Default: `AbinitTask` 

        Returns:   
            The generated `Task`.
        """
        work = Workflow(manager=manager)
        task = work.register(input, deps=deps, task_class=task_class)

        self.register_work(work)
        return task

    def register_work(self, work, deps=None, manager=None, workdir=None):
        """
        Register a new `Workflow` and add it to the internal list, 
        taking into account possible dependencies.

        Args:
            work:
                `Workflow` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            workdir:
                The name of the directory used for the `Workflow`.

        Returns:   
            The workflow.
        """
        # Directory of the workflow.
        if workdir is None:
            work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))
        else:
            work_workdir = os.path.join(self.workdir, os.path.basenane(workdir))

        # Make a deepcopy since manager is mutable and we might change it at run-time.
        manager = self.manager.deepcopy() if manager is None else manager.deepcopy()

        work.set_workdir(work_workdir)
        work.set_manager(manager)

        self.works.append(work)

        if deps:
            deps = [Dependency(node, exts) for node, exts in deps.items()]
            work.add_deps(deps)

        return work

    def register_cbk(self, cbk, cbk_data, deps, work_class, manager=None):
        """
        Registers a callback function that will generate the Task of the Workflow.

        Args:
            cbk:
                Callback function.
            cbk_data
                Additional data passed to the callback function.
            deps:
                List of `Dependency` objects specifying the dependency of the workflow.
            work_class:
                `Workflow` class to instantiate.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the `Flow`.
                                                                                                            
        Returns:   
            The `Workflow` that will be finalized by the callback.
        """
        # TODO: pass a workflow factory instead of a class
        # Directory of the workflow.
        work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))
                                                                                                            
        # Make a deepcopy since manager is mutable and we might change it at run-time.
        manager = self.manager.deepcopy() if manager is None else manager.deepcopy()
                                                                                                            
        # Create an empty workflow and register the callback
        work = work_class(workdir=work_workdir, manager=manager)
        
        self._works.append(work)
                                                                                                            
        deps = [Dependency(node, exts) for node, exts in deps.items()]
        if not deps:
            raise ValueError("A callback must have deps!")

        work.add_deps(deps)

        # Wrap the callable in a Callback object and save 
        # useful info such as the index of the workflow and the callback data.
        cbk = Callback(cbk, work, deps=deps, cbk_data=cbk_data)
                                                                                                            
        self._callbacks.append(cbk)
                                                                                                            
        return work

    def allocate(self):
        for work in self:
            work.allocate()

        return self

    def show_dependencies(self):
        for work in self:
            work.show_intrawork_deps()

    def on_dep_ok(self, signal, sender):
        # TODO
        # Replace this callback with dynamic dispatch
        # on_all_S_OK for workflow
        # on_S_OK for task
        print("on_dep_ok with sender %s, signal %s" % (str(sender), signal))

        for i, cbk in enumerate(self._callbacks):

            if not cbk.handle_sender(sender):
                print("Do not handle")
                continue

            if not cbk.can_execute():
                print("cannot execute")
                continue 

            # Execute the callback to generate the workflow.
            print("about to build new workflow")
            #empty_work = self._works[cbk.w_idx]

            # TODO better treatment of ids
            # Make sure the new workflow has the same id as the previous one.
            #new_work_idx = cbk.w_idx
            work = cbk(flow=self)
            work.add_deps(cbk.deps)

            # Disable the callback.
            cbk.disable()

            # Update the database.
            self.pickle_dump()

    #def finalize(self):
    #    """This method is called when the flow is completed."""

    @property
    def all_ok(self):
        return all(work.all_ok for work in self)

    def connect_signals(self):
        """
        Connect the signals within the workflow.
        self is responsible for catching the important signals raised from 
        its task and raise new signals when some particular condition occurs.
        """
        # Connect the signals inside each Workflow.
        for work in self:
            work.connect_signals()

        # Observe the nodes that must reach S_OK in order to call the callbacks.
        for cbk in self._callbacks:
            for dep in cbk.deps:
                print("connecting %s \nwith sender %s, signal %s" % (str(cbk), dep.node, dep.node.S_OK))
                dispatcher.connect(self.on_dep_ok, signal=dep.node.S_OK, sender=dep.node, weak=False)

        #self.show_receivers()

    def show_receivers(self, sender=dispatcher.Any, signal=dispatcher.Any):
        print("*** live receivers ***")
        for rec in dispatcher.liveReceivers(dispatcher.getReceivers(sender, signal)):
            print("receiver -->", rec)
        print("*** end live receivers ***")
Пример #9
0
class AbinitFlow(Node):
#class AbinitFlow(collections.Iterable):
    """
    This object is a container of workflows. Its main task is managing the 
    possible inter-depedencies among the workflows and the creation of
    dynamic worfflows that are generates by callbacks registered by the user.

    .. attributes:

        creation_date:
            String with the creation_date

        pickle_protocol: 
            Protocol for Pickle database (default: -1 i.e. latest protocol)
    """
    VERSION = "0.1"

    PICKLE_FNAME = "__AbinitFlow__.pickle"

    def __init__(self, workdir, manager, auto_restart=False, pickle_protocol=-1):
        """
        Args:
            workdir:
                String specifying the directory where the workflows will be produced.
            manager:
                `TaskManager` object responsible for the submission of the jobs.
            auto_restart:
                True if unconverged calculations should be restarted automatically.
            pickle_procol:
                Pickle protocol version used for saving the status of the object.
                -1 denotes the latest version supported by the python interpreter.
        """
        super(AbinitFlow, self).__init__()

        self.workdir = os.path.abspath(workdir)
        self.creation_date = time.asctime()

        self.manager = manager.deepcopy()
        self.auto_restart = auto_restart

        # List of workflows.
        self._works = []

        # List of callbacks that must be executed when the dependencies reach S_OK
        self._callbacks = []

        # Directories with (input|output|temporary) data.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

        self.pickle_protocol = int(pickle_protocol)

        # Signal slots: a dictionary with the list 
        # of callbacks indexed by node_id and SIGNAL_TYPE.
        # When the node changes its status, it broadcast a signal.
        # The flow is listening to all the nodes of the calculation
        # [node_id][SIGNAL] = list_of_signal_handlers
        #self._sig_slots =  slots = {}
        #for work in self:
        #    slots[work] = {s: [] for s in work.S_ALL}

        #for task in self.iflat_tasks():
        #    slots[task] = {s: [] for s in work.S_ALL}

    def __len__(self):
        return len(self.works)

    def __iter__(self):
        return self.works.__iter__()

    def __getitem__(self, slice):
        return self.works[slice]

    @property
    def works(self):
        """List of `Workflow` objects contained in self.."""
        return self._works

    @property
    def all_ok(self):
        """True if all the tasks in workflows have reached S_OK."""
        return all(work.all_ok for work in self)

    #@property
    #def completed(self):
    #    """True if all the tasks of the flow have reached S_OK."""
    #    return all(task.status == task.S_OK for task in self.iflat_tasks())

    def iflat_tasks_wti(self, status=None, op="="):
        """
        Returns:
            (task, work_index, task_index)
        """
        return self._iflat_tasks_wti(status=status, op=op, with_wti=True)

    def iflat_tasks(self, status=None, op="="):
        """
        Returns:
            task
        """
        return self._iflat_tasks_wti(status=status, op=op, with_wti=False)

    def _iflat_tasks_wti(self, status=None, op="=", with_wti=True):
        """
        Generators that produces a flat sequence of task.
        if status is not None, only the tasks with the specified status are selected.

        Returns:
            (task, work_index, task_index) if with_wti is True else task
        """
        if status is None:
            for wi, work in enumerate(self):
                for ti, task in enumerate(work):
                    if with_wti:
                        yield task, wi, ti
                    else:
                        yield task

        else:
            # Get the operator from the string.
            import operator
            op = {
                "=": operator.eq,
                "!=": operator.ne,
                ">": operator.gt,
                ">=": operator.ge,
                "<": operator.lt,
                "<=": operator.le,
            }[op]

            for wi, work in enumerate(self):
                for ti, task in enumerate(work):
                    if op(task.status, status):
                        if with_wti:
                            yield task, wi, ti
                        else:
                            yield task

    @property
    def ncpus_reserved(self):
        """
        Returns the number of CPUs reserved in this moment.
        A CPUS is reserved if it's still not running but 
        we have submitted the task to the queue manager.
        """
        return sum(work.ncpus_reverved for work in self)

    @property
    def ncpus_allocated(self):
        """
        Returns the number of CPUs allocated in this moment.
        A CPU is allocated if it's running a task or if we have
        submitted a task to the queue manager but the job is still pending.
        """
        return sum(work.ncpus_allocated for work in self)

    @property
    def ncpus_inuse(self):
        """
        Returns the number of CPUs used in this moment.
        A CPU is used if there's a job that is running on it.
        """
        return sum(work.ncpus_inuse for work in self)

    def check_status(self):
        """Check the status of the workflows in self."""
        for work in self:
            work.check_status()

        # Test whether some task should be restarted.
        if self.auto_restart:
            num_restarts = 0
            for task, wt in self.iflat_tasks(status=Task.S_UNCONVERGED):
                msg = "AbinitFlow will try restart task %s" % task
                print(msg)
                logger.info(msg)
                retcode = task.restart_if_needed()
                if retcode == 0: 
                    num_restarts += 1

            if num_restarts:
                print("num_restarts done successfully: ", num_restarts)
                self.pickle_dump()

    def show_status(self, stream=sys.stdout):
        """
        Report the status of the workflows and the status 
        of the different tasks on the specified stream.
        """
        for i, work in enumerate(self):
            print(80*"=")
            print("Workflow #%d: %s, Finalized=%s\n" % (i, work, work.finalized) )

            table = [[
                     "Task", "Status", "Queue_id", 
                     "Errors", "Warnings", "Comments", 
                     "MPI", "OMP", 
                     "num_restarts", "max_restarts", "Task Class"
                     ]]

            for task in work:
                task_name = os.path.basename(task.name)

                # Parse the events in the main output.
                report = task.get_event_report()

                events = map(str, 3*["N/A"])
                if report is not None: 
                    events = map(str, [report.num_errors, report.num_warnings, report.num_comments])

                cpu_info = map(str, [task.mpi_ncpus, task.omp_ncpus])
                task_info = map(str, [task.num_restarts, task.max_num_restarts, task.__class__.__name__])

                table.append(
                    [task_name, str(task.status), str(task.queue_id)] + 
                    events + 
                    cpu_info + 
                    task_info
                    )

            pprint_table(table, out=stream)

    def build(self, *args, **kwargs):
        """Make directories and files of the `Flow`."""
        self.indir.makedirs()
        self.outdir.makedirs()
        self.tmpdir.makedirs()

        for work in self:
            work.build(*args, **kwargs)

    def build_and_pickle_dump(self):
        """
        Build dirs and file of the `Flow` and save the object in pickle format.

        Returns:
            0 if success
        """
        self.build()
        return self.pickle_dump()

    def pickle_dump(self):
        """
        Save the status of the object in pickle format.

        Returns:
            0 if success
        """
        protocol = self.pickle_protocol
        filepath = os.path.join(self.workdir, self.PICKLE_FNAME)

        with FileLock(filepath) as lock:
            with open(filepath, mode="w" if protocol == 0 else "wb") as fh:
                pickle.dump(self, fh, protocol=protocol)

        # Atomic transaction.
        #filepath_new = filepath + ".new"
        #filepath_save = filepath + ".save"
        #shutil.copyfile(filepath, filepath_save)

        #try:
        #    with open(filepath_new, mode="w" if protocol == 0 else "wb") as fh:
        #        pickle.dump(self, fh, protocol=protocol)

        #    os.rename(filepath_new, filepath)
        #except IOError:
        #    os.rename(filepath_save, filepath)
        #finally:
        #    try
        #        os.remove(filepath_save)
        #    except:
        #        pass
        #    try
        #        os.remove(filepath_new)
        #    except:
        #        pass
        return 0

    @classmethod
    def pickle_load(cls, filepath, disable_signals=False):
        """
        Loads the object from a pickle file and performs initial setup.

        Args:
            filepath:
                Filename or directory name. It filepath is a directory, we 
                scan the directory tree starting from filepath and we 
                read the first pickle database.
            disable_signals:
                If True, the nodes of the flow are not connected by signals.
                This option is usually used when we want to read a flow 
                in read-only mode and we want to avoid any possible side effect.
        """
        if os.path.isdir(filepath):
            # Walk through each directory inside path and find the pickle database.
            for dirpath, dirnames, filenames in os.walk(filepath):
                fnames = [f for f in filenames if f == cls.PICKLE_FNAME]
                if fnames:
                    assert len(fnames) == 1
                    filepath = os.path.join(dirpath, fnames[0])
                    break
            else:
                err_msg = "Cannot find %s inside directory %s" % (cls.PICKLE_FNAME, path)
                raise ValueError(err_msg)

        with FileLock(filepath) as lock:
            with open(filepath, "rb") as fh:
                flow = pickle.load(fh)

        if not disable_signals:
            flow.connect_signals()

        # Recompute the status of each task since tasks that
        # have been submitted previously might be completed.
        flow.check_status()
        return flow

    def register_task(self, input, deps=None, manager=None, task_class=None):
        """
        Utility function that generates a `Workflow` made of a single task

        Args:
            input:
                Abinit Input file or `Strategy` object of `Task` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            task_class:
                Task subclass to instantiate. Default: `AbinitTask` 

        Returns:   
            The generated `Task`.
        """
        work = Workflow(manager=manager)
        task = work.register(input, deps=deps, task_class=task_class)
        self.register_work(work)

        return task

    def register_work(self, work, deps=None, manager=None, workdir=None):
        """
        Register a new `Workflow` and add it to the internal list, 
        taking into account possible dependencies.

        Args:
            work:
                `Workflow` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            workdir:
                The name of the directory used for the `Workflow`.

        Returns:   
            The registered `Workflow`.
        """
        # Directory of the workflow.
        if workdir is None:
            work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))
        else:
            work_workdir = os.path.join(self.workdir, os.path.basenane(workdir))

        work.set_workdir(work_workdir)

        if manager is not None:
            work.set_manager(manager)

        self.works.append(work)

        if deps:
            deps = [Dependency(node, exts) for node, exts in deps.items()]
            work.add_deps(deps)

        return work

    def register_cbk(self, cbk, cbk_data, deps, work_class, manager=None):
        """
        Registers a callback function that will generate the `Task` of the `Workflow`.

        Args:
            cbk:
                Callback function.
            cbk_data
                Additional data passed to the callback function.
            deps:
                List of `Dependency` objects specifying the dependency of the workflow.
            work_class:
                `Workflow` class to instantiate.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the `Flow`.
                                                                                                            
        Returns:   
            The `Workflow` that will be finalized by the callback.
        """
        # TODO: pass a workflow factory instead of a class
        # Directory of the workflow.
        work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))

        # Create an empty workflow and register the callback
        work = work_class(workdir=work_workdir, manager=manager)
        
        self._works.append(work)
                                                                                                            
        deps = [Dependency(node, exts) for node, exts in deps.items()]
        if not deps:
            raise ValueError("A callback must have deps!")

        work.add_deps(deps)

        # Wrap the callable in a Callback object and save 
        # useful info such as the index of the workflow and the callback data.
        cbk = Callback(cbk, work, deps=deps, cbk_data=cbk_data)
                                                                                                            
        self._callbacks.append(cbk)
                                                                                                            
        return work

    def allocate(self, manager=None):
        """
        Allocate the `AbinitFlow` i.e. assign the `workdir` and (optionally) 
        the `TaskManager` to the different tasks in the Flow.
        """
        for work in self:
            work.allocate(manager=self.manager)
            work.set_flow(self)

        for task in self.iflat_tasks():
            task.set_flow(self)

        return self

    def show_dependencies(self):
        for work in self:
            work.show_intrawork_deps()

    def on_dep_ok(self, signal, sender):
        # TODO
        # Replace this callback with dynamic dispatch
        # on_all_S_OK for workflow
        # on_S_OK for task
        print("on_dep_ok with sender %s, signal %s" % (str(sender), signal))

        for i, cbk in enumerate(self._callbacks):

            if not cbk.handle_sender(sender):
                print("Do not handle")
                continue

            if not cbk.can_execute():
                print("cannot execute")
                continue 

            # Execute the callback to generate the workflow.
            print("about to build new workflow")
            #empty_work = self._works[cbk.w_idx]

            # TODO better treatment of ids
            # Make sure the new workflow has the same id as the previous one.
            #new_work_idx = cbk.w_idx
            work = cbk(flow=self)
            work.add_deps(cbk.deps)

            # Disable the callback.
            cbk.disable()

            # Update the database.
            self.pickle_dump()

    #def finalize(self):
    #    """This method is called when the flow is completed."""

    def connect_signals(self):
        """
        Connect the signals within the workflow.
        self is responsible for catching the important signals raised from 
        its task and raise new signals when some particular condition occurs.
        """
        # Connect the signals inside each Workflow.
        for work in self:
            work.connect_signals()

        # Observe the nodes that must reach S_OK in order to call the callbacks.
        for cbk in self._callbacks:
            for dep in cbk.deps:
                print("connecting %s \nwith sender %s, signal %s" % (str(cbk), dep.node, dep.node.S_OK))
                dispatcher.connect(self.on_dep_ok, signal=dep.node.S_OK, sender=dep.node, weak=False)

        # Associate to each signal the callback _on_signal
        # (bound method of the node that will be called by `AbinitFlow`
        # Each node will set its attribute _done_signal to True to tell
        # the flow that this callback should be disabled.

        # Register the callbacks for the Workflows.
        #for work in self:
        #    slot = self._sig_slots[work]
        #    for signal in S_ALL:
        #        done_signal = getattr(work, "_done_ " + signal, False)
        #        if not done_sig:
        #            cbk_name = "_on_" + str(signal)
        #            cbk = getattr(work, cbk_name, None)
        #            if cbk is None: continue
        #            slot[work][signal].append(cbk)
        #            print("connecting %s\nwith sender %s, signal %s" % (str(cbk), dep.node, dep.node.S_OK))
        #            dispatcher.connect(self.on_dep_ok, signal=signal, sender=dep.node, weak=False)

        # Register the callbacks for the Tasks.

        #self.show_receivers()

    def show_receivers(self, sender=dispatcher.Any, signal=dispatcher.Any):
        print("*** live receivers ***")
        for rec in dispatcher.liveReceivers(dispatcher.getReceivers(sender, signal)):
            print("receiver -->", rec)
        print("*** end live receivers ***")
Пример #10
0
class AbiFireTask(FireTaskBase):

    # List of `AbinitEvent` subclasses that are tested in the check_status method.
    # Subclasses should provide their own list if they need to check the converge status.
    CRITICAL_EVENTS = []

    S_INIT = Status.from_string("Initialized")
    S_LOCKED = Status.from_string("Locked")
    S_READY = Status.from_string("Ready")
    S_SUB = Status.from_string("Submitted")
    S_RUN = Status.from_string("Running")
    S_DONE = Status.from_string("Done")
    S_ABICRITICAL = Status.from_string("AbiCritical")
    S_QCRITICAL = Status.from_string("QCritical")
    S_UNCONVERGED = Status.from_string("Unconverged")
    S_ERROR = Status.from_string("Error")
    S_OK = Status.from_string("Completed")

    ALL_STATUS = [
        S_INIT,
        S_LOCKED,
        S_READY,
        S_SUB,
        S_RUN,
        S_DONE,
        S_ABICRITICAL,
        S_QCRITICAL,
        S_UNCONVERGED,
        S_ERROR,
        S_OK,
    ]

    def __init__(self, abiinput):
        """
        Basic __init__, subclasses are supposed to define the same input parameters, add their own and call super for
        the basic ones. The input parameter should be stored as attributes of the instance for serialization and
        for inspection.
        """
        self.abiinput = abiinput

    @serialize_fw
    def to_dict(self):
        d = {}
        for arg in inspect.getargspec(self.__init__).args:
            if arg != "self":
                val = self.__getattribute__(arg)
                if hasattr(val, "as_dict"):
                    val = val.as_dict()
                elif isinstance(val, (tuple, list)):
                    val = [
                        v.as_dict() if hasattr(v, "as_dict") else v
                        for v in val
                    ]
                d[arg] = val

        return d

    @classmethod
    def from_dict(cls, d):
        dec = MontyDecoder()
        kwargs = {
            k: dec.process_decoded(v)
            for k, v in d.items() if k in inspect.getargspec(cls.__init__).args
        }
        return cls(**kwargs)

    #from Task
    def set_workdir(self, workdir):
        """Set the working directory."""

        self.workdir = os.path.abspath(workdir)

        # Files required for the execution.
        self.input_file = File(os.path.join(self.workdir, "run.abi"))
        self.output_file = File(os.path.join(self.workdir, "run.abo"))
        self.files_file = File(os.path.join(self.workdir, "run.files"))
        self.log_file = File(os.path.join(self.workdir, "run.log"))
        self.stderr_file = File(os.path.join(self.workdir, "run.err"))

        # Directories with input|output|temporary data.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

    # from Task
    def build(self):
        """
        Creates the working directory and the input files of the :class:`Task`.
        It does not overwrite files if they already exist.
        """
        # Create dirs for input, output and tmp data.
        self.indir.makedirs()
        self.outdir.makedirs()
        self.tmpdir.makedirs()

        # Write files file and input file.
        if not self.files_file.exists:
            self.files_file.write(self.filesfile_string)

        self.input_file.write(str(self.abiinput))

    #from Task
    # Prefixes for Abinit (input, output, temporary) files.
    Prefix = collections.namedtuple("Prefix", "idata odata tdata")
    pj = os.path.join

    prefix = Prefix(pj("indata", "in"), pj("outdata", "out"),
                    pj("tmpdata", "tmp"))
    del Prefix, pj

    #from AbintTask
    @property
    def filesfile_string(self):
        """String with the list of files and prefixes needed to execute ABINIT."""
        lines = []
        app = lines.append
        pj = os.path.join

        app(self.input_file.path)  # Path to the input file
        app(self.output_file.path)  # Path to the output file
        app(pj(self.workdir, self.prefix.idata))  # Prefix for input data
        app(pj(self.workdir, self.prefix.odata))  # Prefix for output data
        app(pj(self.workdir, self.prefix.tdata))  # Prefix for temporary data

        # Paths to the pseudopotential files.
        # Note that here the pseudos **must** be sorted according to znucl.
        # Here we reorder the pseudos if the order is wrong.
        ord_pseudos = []
        znucl = self.abiinput.structure.to_abivars()["znucl"]

        for z in znucl:
            for p in self.abiinput.pseudos:
                if p.Z == z:
                    ord_pseudos.append(p)
                    break
            else:
                raise ValueError(
                    "Cannot find pseudo with znucl %s in pseudos:\n%s" %
                    (z, self.pseudos))

        for pseudo in ord_pseudos:
            app(pseudo.path)

        return "\n".join(lines)

    def run_abinit(self, fw_spec):

        with open(self.files_file.path, 'r') as stdin, open(self.log_file.path, 'w') as stdout, \
            open(self.stderr_file.path, 'w') as stderr:

            p = subprocess.Popen(['mpirun', 'abinit'],
                                 stdin=stdin,
                                 stdout=stdout,
                                 stderr=stderr)

        (stdoutdata, stderrdata) = p.communicate()
        self.returncode = p.returncode

    def get_event_report(self):
        """
        Analyzes the main output file for possible Errors or Warnings.

        Returns:
            :class:`EventReport` instance or None if the main output file does not exist.
        """

        if not self.log_file.exists:
            return None

        parser = events.EventsParser()
        try:
            report = parser.parse(self.log_file.path)
            return report

        except parser.Error as exc:
            # Return a report with an error entry with info on the exception.
            logger.critical("%s: Exception while parsing ABINIT events:\n %s" %
                            (self.log_file, str(exc)))
            return parser.report_exception(self.log_file.path, exc)

    def task_analysis(self, fw_spec):

        status, msg = self.check_final_status()

        if self.status != self.S_OK:
            raise AbinitRuntimeError(self)

        return FWAction(stored_data=dict(**self.report.as_dict()))

    def run_task(self, fw_spec):
        self.set_workdir(os.path.abspath('.'))
        self.build()
        self.run_abinit(fw_spec)
        return self.task_analysis(fw_spec)

    def set_status(self, status, msg=None):
        self.status = status
        return status, msg

    def check_final_status(self):
        """
        This function checks the status of the task by inspecting the output and the
        error files produced by the application. Based on abipy task checkstatus().
        """
        # 2) see if an error occured at starting the job
        # 3) see if there is output
        # 4) see if abinit reports problems
        # 5) see if err file exists and is empty
        # 9) the only way of landing here is if there is a output file but no err files...

        # 2) Check the returncode of the process (the process of submitting the job) first.
        if self.returncode != 0:
            # The job was not submitted properly
            return self.set_status(self.S_QCRITICAL,
                                   msg="return code %s" % self.returncode)

        # Analyze the stderr file for Fortran runtime errors.
        err_msg = None
        if self.stderr_file.exists:
            err_msg = self.stderr_file.read()

        # Start to check ABINIT status if the output file has been created.
        if self.output_file.exists:
            try:
                self.report = self.get_event_report()
            except Exception as exc:
                msg = "%s exception while parsing event_report:\n%s" % (self,
                                                                        exc)
                logger.critical(msg)
                return self.set_status(self.S_ABICRITICAL, msg=msg)

            if self.report.run_completed:

                # Check if the calculation converged.
                not_ok = self.report.filter_types(self.CRITICAL_EVENTS)
                if not_ok:
                    return self.set_status(self.S_UNCONVERGED)
                else:
                    return self.set_status(self.S_OK)

            # Calculation still running or errors?
            if self.report.errors or self.report.bugs:
                # Abinit reported problems
                if self.report.errors:
                    logger.debug('Found errors in report')
                    for error in self.report.errors:
                        logger.debug(str(error))
                        try:
                            self.abi_errors.append(error)
                        except AttributeError:
                            self.abi_errors = [error]

                # The job is unfixable due to ABINIT errors
                logger.debug(
                    "%s: Found Errors or Bugs in ABINIT main output!" % self)
                msg = "\n".join(
                    map(repr, self.report.errors + self.report.bugs))
                return self.set_status(self.S_ABICRITICAL, msg=msg)

        # 9) if we still haven't returned there is no indication of any error and the job can only still be running
        # but we should actually never land here, or we have delays in the file system ....
        # print('the job still seems to be running maybe it is hanging without producing output... ')

        # Check time of last modification.
        if self.output_file.exists and \
           (time.time() - self.output_file.get_stat().st_mtime > self.manager.policy.frozen_timeout):
            msg = "Task seems to be frozen, last change more than %s [s] ago" % self.manager.policy.frozen_timeout
            return self.set_status(self.S_ERROR, msg)

        return self.set_status(self.S_RUN)

    # from GsTask
    @property
    def gsr_path(self):
        """Absolute path of the GSR file. Empty string if file is not present."""
        # Lazy property to avoid multiple calls to has_abiext.
        try:
            return self._gsr_path
        except AttributeError:
            path = self.outdir.has_abiext("GSR")
            if path: self._gsr_path = path
            return path

    def open_gsr(self):
        """
        Open the GSR file located in the in self.outdir.
        Returns :class:`GsrFile` object, None if file could not be found or file is not readable.
        """
        gsr_path = self.gsr_path
        if not gsr_path:
            if self.status == self.S_OK:
                logger.critical(
                    "%s reached S_OK but didn't produce a GSR file in %s" %
                    (self, self.outdir))
            return None

        # Open the GSR file.
        from abipy.electrons.gsr import GsrFile
        try:
            return GsrFile(gsr_path)
        except Exception as exc:
            logger.critical("Exception while reading GSR file at %s:\n%s" %
                            (gsr_path, str(exc)))
            return None
Пример #11
0
class AbinitFlow(Node):
    """
    This object is a container of workflows. Its main task is managing the 
    possible inter-depedencies among the workflows and the creation of
    dynamic worfflows that are generates by callbacks registered by the user.

    .. attributes:

        creation_date:
            String with the creation_date

        pickle_protocol: 
            Protocol for Pickle database (default: -1 i.e. latest protocol)
    """
    VERSION = "0.1"

    PICKLE_FNAME = "__AbinitFlow__.pickle"

    def __init__(self, workdir, manager, pickle_protocol=-1):
        """
        Args:
            workdir:
                String specifying the directory where the workflows will be produced.
            manager:
                `TaskManager` object responsible for the submission of the jobs.
            pickle_procol:
                Pickle protocol version used for saving the status of the object.
                -1 denotes the latest version supported by the python interpreter.
        """
        super(AbinitFlow, self).__init__()

        self.set_workdir(workdir)

        self.creation_date = time.asctime()

        self.manager = manager.deepcopy()

        # List of workflows.
        self._works = []

        # List of callbacks that must be executed when the dependencies reach S_OK
        self._callbacks = []

        self.pickle_protocol = int(pickle_protocol)

        # TODO
        # Signal slots: a dictionary with the list
        # of callbacks indexed by node_id and SIGNAL_TYPE.
        # When the node changes its status, it broadcast a signal.
        # The flow is listening to all the nodes of the calculation
        # [node_id][SIGNAL] = list_of_signal_handlers
        #self._sig_slots =  slots = {}
        #for work in self:
        #    slots[work] = {s: [] for s in work.S_ALL}

        #for task in self.iflat_tasks():
        #    slots[task] = {s: [] for s in work.S_ALL}

    def set_workdir(self, workdir, chroot=False):
        """
        Set the working directory. Cannot be set more than once unless chroot is True
        """
        if not chroot and hasattr(self, "workdir") and self.workdir != workdir:
            raise ValueError("self.workdir != workdir: %s, %s" %
                             (self.workdir, workdir))

        # Directories with (input|output|temporary) data.
        self.workdir = os.path.abspath(workdir)
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

    @classmethod
    def pickle_load(cls, filepath, disable_signals=False):
        """
        Loads the object from a pickle file and performs initial setup.

        Args:
            filepath:
                Filename or directory name. It filepath is a directory, we 
                scan the directory tree starting from filepath and we 
                read the first pickle database.
            disable_signals:
                If True, the nodes of the flow are not connected by signals.
                This option is usually used when we want to read a flow 
                in read-only mode and we want to avoid any possible side effect.
        """
        if os.path.isdir(filepath):
            # Walk through each directory inside path and find the pickle database.
            for dirpath, dirnames, filenames in os.walk(filepath):
                fnames = [f for f in filenames if f == cls.PICKLE_FNAME]
                if fnames:
                    assert len(fnames) == 1
                    filepath = os.path.join(dirpath, fnames[0])
                    break
            else:
                err_msg = "Cannot find %s inside directory %s" % (
                    cls.PICKLE_FNAME, filepath)
                raise ValueError(err_msg)

        #with FileLock(filepath) as lock:
        with open(filepath, "rb") as fh:
            flow = pickle.load(fh)

        # Check if versions match.
        if flow.VERSION != cls.VERSION:
            msg = ("File flow version %s != latest version %s\n."
                   "Regerate the flow to solve the problem " %
                   (flow.VERSION, cls.VERSION))
            warnings.warn(msg)

        if not disable_signals:
            flow.connect_signals()

        # Recompute the status of each task since tasks that
        # have been submitted previously might be completed.
        flow.check_status()
        return flow

    def __len__(self):
        return len(self.works)

    def __iter__(self):
        return self.works.__iter__()

    def __getitem__(self, slice):
        return self.works[slice]

    @property
    def works(self):
        """List of `Workflow` objects contained in self.."""
        return self._works

    @property
    def all_ok(self):
        """True if all the tasks in workflows have reached S_OK."""
        return all(work.all_ok for work in self)

    @property
    def all_tasks(self):
        return self.iflat_tasks()

    @property
    def num_tasks(self):
        """Total number of tasks"""
        return len(list(self.iflat_tasks()))

    @property
    def errored_tasks(self):
        """List of errored tasks."""
        return list(self.iflat_tasks(status=self.S_ERROR))

    @property
    def num_errored_tasks(self):
        """The number of tasks whose status is `S_ERROR`."""
        return len(self.errored_tasks)

    @property
    def unconverged_tasks(self):
        """List of unconverged tasks."""
        return list(self.iflat_tasks(status=self.S_UNCONVERGED))

    @property
    def num_unconverged_tasks(self):
        """The number of tasks whose status is `S_UNCONVERGED`."""
        return len(self.unconverged_tasks)

    @property
    def status_counter(self):
        """
        Returns a `Counter` object that counts the number of tasks with 
        given status (use the string representation of the status as key).
        """
        # Count the number of tasks with given status in each workflow.
        counter = self[0].status_counter
        for work in self[1:]:
            counter += work.status_counter

        return counter

    @property
    def ncpus_reserved(self):
        """
        Returns the number of CPUs reserved in this moment.
        A CPUS is reserved if the task is not running but 
        we have submitted the task to the queue manager.
        """
        return sum(work.ncpus_reverved for work in self)

    @property
    def ncpus_allocated(self):
        """
        Returns the number of CPUs allocated in this moment.
        A CPU is allocated if it's running a task or if we have
        submitted a task to the queue manager but the job is still pending.
        """
        return sum(work.ncpus_allocated for work in self)

    @property
    def ncpus_inuse(self):
        """
        Returns the number of CPUs used in this moment.
        A CPU is used if there's a job that is running on it.
        """
        return sum(work.ncpus_inuse for work in self)

    @property
    def has_chrooted(self):
        """
        Returns a string that evaluates to True if we have changed 
        the workdir for visualization purposes e.g. we are using sshfs.
        to mount the remote directory where the `Flow` is located.
        The string gives the previous workdir of the flow.
        """
        try:
            return self._chrooted_from

        except AttributeError:
            return ""

    def chroot(self, new_workdir):
        """
        Change the workir of the `Flow`. Mainly used for
        allowing the user to open the GUI on the local host
        and access the flow from remote via sshfs.

        .. note:
            Calling this method will make the flow go in read-only mode.
        """
        self._chrooted_from = self.workdir
        self.set_workdir(new_workdir, chroot=True)

        for i, work in enumerate(self):
            new_wdir = os.path.join(self.workdir, "work_" + str(i))
            work.chroot(new_wdir)

    def groupby_status(self):
        """
        Returns a ordered dictionary mapping the task status to 
        the list of named tuples (task, work_index, task_index).
        """
        Entry = collections.namedtuple("Entry", "task wi ti")
        d = collections.defaultdict(list)

        for task, wi, ti in self.iflat_tasks_wti():
            d[task.status].append(Entry(task, wi, ti))

        # Sort keys according to their status.
        return collections.OrderedDict([(k, d[k])
                                        for k in sorted(list(d.keys()))])

    def iflat_tasks_wti(self, status=None, op="=="):
        """
        Generator to iterate over all the tasks of the `Flow`.
        Yields

            (task, work_index, task_index)

        If status is not None, only the tasks whose status satisfies
        the condition (task.status op status) are selected
        status can be either one of the flags defined in the `Task` class 
        (e.g Task.S_OK) or a string e.g "S_OK" 
        """
        return self._iflat_tasks_wti(status=status, op=op, with_wti=True)

    def iflat_tasks(self, status=None, op="=="):
        """
        Generator to iterate over all the tasks of the `Flow`.

        If status is not None, only the tasks whose status satisfies
        the condition (task.status op status) are selected
        status can be either one of the flags defined in the `Task` class 
        (e.g Task.S_OK) or a string e.g "S_OK" 
        """
        return self._iflat_tasks_wti(status=status, op=op, with_wti=False)

    def _iflat_tasks_wti(self, status=None, op="==", with_wti=True):
        """
        Generators that produces a flat sequence of task.
        if status is not None, only the tasks with the specified status are selected.

        Returns:
            (task, work_index, task_index) if with_wti is True else task
        """
        if status is None:
            for wi, work in enumerate(self):
                for ti, task in enumerate(work):
                    if with_wti:
                        yield task, wi, ti
                    else:
                        yield task

        else:
            # Get the operator from the string.
            import operator
            op = {
                "==": operator.eq,
                "!=": operator.ne,
                ">": operator.gt,
                ">=": operator.ge,
                "<": operator.lt,
                "<=": operator.le,
            }[op]

            # Accept Task.S_FLAG or string.
            if is_string(status):
                status = getattr(Task, status)

            for wi, work in enumerate(self):
                for ti, task in enumerate(work):
                    if op(task.status, status):
                        if with_wti:
                            yield task, wi, ti
                        else:
                            yield task

    def check_dependencies(self):
        """Test the dependencies of the nodes for possible deadlocks."""
        deadlocks = []

        for task in self.all_tasks:
            for dep in task.deps:
                if dep.node.depends_on(task):
                    deadlocks.append((task, dep.node))

        if deadlocks:
            lines = [
                "Detect wrong list of dependecies that will lead to a deadlock:"
            ]
            lines.extend(["%s <--> %s" % nodes for nodes in deadlocks])
            raise ValueError("\n".join(lines))

    #def detect_deadlock(self):
    #    eu_tasks = list(self.errored_tasks) + list(self.unconverged_tasks)
    #     if not eu_tasks:
    #        return []

    #    deadlocked = []
    #    for task in self.all_tasks:
    #        if any(task.depends_on(eu_task) for eu_task in eu_tasks):
    #            deadlocked.append(task)

    #    return deadlocked

    def check_status(self):
        """Check the status of the workflows in self."""
        for work in self:
            work.check_status()

    def show_status(self, stream=sys.stdout):
        """
        Report the status of the workflows and the status 
        of the different tasks on the specified stream.
        """
        for i, work in enumerate(self):
            print(80 * "=")
            print("Workflow #%d: %s, Finalized=%s\n" %
                  (i, work, work.finalized))

            table = [[
                "Task", "Status", "Queue_id", "Errors", "Warnings", "Comments",
                "MPI", "OMP", "num_restarts", "Task Class"
            ]]

            for task in work:
                task_name = os.path.basename(task.name)

                # Parse the events in the main output.
                report = task.get_event_report()

                events = map(str, 3 * ["N/A"])
                if report is not None:
                    events = map(str, [
                        report.num_errors, report.num_warnings,
                        report.num_comments
                    ])

                cpu_info = map(str, [task.mpi_ncpus, task.omp_ncpus])
                task_info = map(str,
                                [task.num_restarts, task.__class__.__name__])

                table.append([task_name,
                              str(task.status),
                              str(task.queue_id)] + events + cpu_info +
                             task_info)

            pprint_table(table, out=stream)

    def open_files(self,
                   what="o",
                   wti=None,
                   status=None,
                   op="==",
                   editor=None):
        """
        Open the files of the flow inside an editor (command line interface).

        Args:
            what:
                string with the list of characters selecting the file type
                Possible choices:
                    i ==> input_file,
                    o ==> output_file,
                    f ==> files_file,
                    j ==> job_file,
                    l ==> log_file,
                    e ==> stderr_file,
                    q ==> qerr_file,
            wti
                tuple with the (work, task_index) to select
                or string in the form w_start:w_stop,task_start:task_stop
            status
                if not None, only the tasks with this status are select
            op:
                status operator. Requires status. A task is selected 
                if task.status op status evaluates to true.
            editor:
                Select the editor. None to use the default editor ($EDITOR shell env var)
        """
        #TODO: Add support for wti
        if wti is not None:
            raise NotImplementedError("wti option is not avaiable!")

        def get_files(task, wi, ti):
            """Helper function used to select the files of a task."""
            choices = {
                "i": task.input_file,
                "o": task.output_file,
                "f": task.files_file,
                "j": task.job_file,
                "l": task.log_file,
                "e": task.stderr_file,
                "q": task.qerr_file,
                #"q": task.qout_file,
            }

            selected = []
            for c in what:
                try:
                    selected.append(getattr(choices[c], "path"))
                except KeyError:
                    import warnings
                    warnings.warn("Wrong keywork %s" % c)
            return selected

        # Build list of files to analyze.
        files = []
        for (task, wi, ti) in self.iflat_tasks_wti(status=status, op=op):
            lst = get_files(task, wi, ti)
            if lst: files.extend(lst)

        #print(files)
        return Editor(editor=editor).edit_files(files)

    def cancel(self):
        """
        Cancel all the tasks that are in the queue.

        Returns:
            Number of jobs cancelled, negative value if error
        """
        if self.has_chrooted:
            # TODO: Use paramiko to kill the job?
            warnings.warn("Cannot cancel the flow via sshfs!")
            return -1

        # If we are running with the scheduler, we must send a SIGKILL signal.
        pid_file = os.path.join(self.workdir, "_PyFlowScheduler.pid")
        if os.path.exists(pid_file):
            with open(pid_file, "r") as fh:
                pid = int(fh.readline())

            retcode = os.system("kill -9 %d" % pid)
            print("Sent SIGKILL to the scheduler, retcode = %s" % retcode)
            try:
                os.remove(pid_file)
            except IOError:
                pass

        num_cancelled = 0
        for task in self.iflat_tasks():
            num_cancelled += task.cancel()

        return num_cancelled

    def build(self, *args, **kwargs):
        """Make directories and files of the `Flow`."""
        self.indir.makedirs()
        self.outdir.makedirs()
        self.tmpdir.makedirs()

        for work in self:
            work.build(*args, **kwargs)

    def build_and_pickle_dump(self):
        """
        Build dirs and file of the `Flow` and save the object in pickle format.

        Returns:
            0 if success
        """
        self.build()
        return self.pickle_dump()

    def pickle_dump(self):
        """
        Save the status of the object in pickle format.

        Returns:
            0 if success
        """
        if self.has_chrooted:
            warnings.warn("Cannot pickle_dump since we have chrooted from %s" %
                          self.has_chrooted)
            return -1

        protocol = self.pickle_protocol
        filepath = os.path.join(self.workdir, self.PICKLE_FNAME)

        with FileLock(filepath) as lock:
            with open(filepath, mode="w" if protocol == 0 else "wb") as fh:
                pickle.dump(self, fh, protocol=protocol)

        # Atomic transaction.
        #filepath_new = filepath + ".new"
        #filepath_save = filepath + ".save"
        #shutil.copyfile(filepath, filepath_save)

        #try:
        #    with open(filepath_new, mode="w" if protocol == 0 else "wb") as fh:
        #        pickle.dump(self, fh, protocol=protocol)

        #    os.rename(filepath_new, filepath)
        #except IOError:
        #    os.rename(filepath_save, filepath)
        #finally:
        #    try
        #        os.remove(filepath_save)
        #    except:
        #        pass
        #    try
        #        os.remove(filepath_new)
        #    except:
        #        pass
        return 0

    def register_task(self, input, deps=None, manager=None, task_class=None):
        """
        Utility function that generates a `Workflow` made of a single task

        Args:
            input:
                Abinit Input file or `Strategy` object of `Task` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            task_class:
                Task subclass to instantiate. Default: `AbinitTask` 

        Returns:   
            The generated `Task`.
        """
        work = Workflow(manager=manager)
        task = work.register(input, deps=deps, task_class=task_class)
        self.register_work(work)

        return task

    def register_work(self, work, deps=None, manager=None, workdir=None):
        """
        Register a new `Workflow` and add it to the internal list, 
        taking into account possible dependencies.

        Args:
            work:
                `Workflow` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            workdir:
                The name of the directory used for the `Workflow`.

        Returns:   
            The registered `Workflow`.
        """
        # Directory of the workflow.
        if workdir is None:
            work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))
        else:
            work_workdir = os.path.join(self.workdir,
                                        os.path.basename(workdir))

        work.set_workdir(work_workdir)

        if manager is not None:
            work.set_manager(manager)

        self.works.append(work)

        if deps:
            deps = [Dependency(node, exts) for node, exts in deps.items()]
            work.add_deps(deps)

        return work

    def register_cbk(self, cbk, cbk_data, deps, work_class, manager=None):
        """
        Registers a callback function that will generate the `Task` of the `Workflow`.

        Args:
            cbk:
                Callback function.
            cbk_data
                Additional data passed to the callback function.
            deps:
                List of `Dependency` objects specifying the dependency of the workflow.
            work_class:
                `Workflow` class to instantiate.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the `Flow`.
                                                                                                            
        Returns:   
            The `Workflow` that will be finalized by the callback.
        """
        # TODO: pass a workflow factory instead of a class
        # Directory of the workflow.
        work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))

        # Create an empty workflow and register the callback
        work = work_class(workdir=work_workdir, manager=manager)

        self._works.append(work)

        deps = [Dependency(node, exts) for node, exts in deps.items()]
        if not deps:
            raise ValueError("A callback must have deps!")

        work.add_deps(deps)

        # Wrap the callable in a Callback object and save
        # useful info such as the index of the workflow and the callback data.
        cbk = Callback(cbk, work, deps=deps, cbk_data=cbk_data)

        self._callbacks.append(cbk)

        return work

    def allocate(self, manager=None):
        """
        Allocate the `AbinitFlow` i.e. assign the `workdir` and (optionally) 
        the `TaskManager` to the different tasks in the Flow.
        """
        for work in self:
            work.allocate(manager=self.manager)
            work.set_flow(self)

        for task in self.iflat_tasks():
            task.set_flow(self)

        self.check_dependencies()
        return self

    def show_dependencies(self):
        for work in self:
            work.show_intrawork_deps()

    def on_dep_ok(self, signal, sender):
        # TODO
        # Replace this callback with dynamic dispatch
        # on_all_S_OK for workflow
        # on_S_OK for task
        print("on_dep_ok with sender %s, signal %s" % (str(sender), signal))

        for i, cbk in enumerate(self._callbacks):

            if not cbk.handle_sender(sender):
                print("Do not handle")
                continue

            if not cbk.can_execute():
                print("cannot execute")
                continue

            # Execute the callback to generate the workflow.
            print("about to build new workflow")
            #empty_work = self._works[cbk.w_idx]

            # TODO better treatment of ids
            # Make sure the new workflow has the same id as the previous one.
            #new_work_idx = cbk.w_idx
            work = cbk(flow=self)
            work.add_deps(cbk.deps)

            # Disable the callback.
            cbk.disable()

            # Update the database.
            self.pickle_dump()

    #def finalize(self):
    #    """This method is called when the flow is completed."""

    def connect_signals(self):
        """
        Connect the signals within the workflow.
        self is responsible for catching the important signals raised from 
        its task and raise new signals when some particular condition occurs.
        """
        # Connect the signals inside each Workflow.
        for work in self:
            work.connect_signals()

        # Observe the nodes that must reach S_OK in order to call the callbacks.
        for cbk in self._callbacks:
            for dep in cbk.deps:
                print("connecting %s \nwith sender %s, signal %s" %
                      (str(cbk), dep.node, dep.node.S_OK))
                dispatcher.connect(self.on_dep_ok,
                                   signal=dep.node.S_OK,
                                   sender=dep.node,
                                   weak=False)

        # Associate to each signal the callback _on_signal
        # (bound method of the node that will be called by `AbinitFlow`
        # Each node will set its attribute _done_signal to True to tell
        # the flow that this callback should be disabled.

        # Register the callbacks for the Workflows.
        #for work in self:
        #    slot = self._sig_slots[work]
        #    for signal in S_ALL:
        #        done_signal = getattr(work, "_done_ " + signal, False)
        #        if not done_sig:
        #            cbk_name = "_on_" + str(signal)
        #            cbk = getattr(work, cbk_name, None)
        #            if cbk is None: continue
        #            slot[work][signal].append(cbk)
        #            print("connecting %s\nwith sender %s, signal %s" % (str(cbk), dep.node, dep.node.S_OK))
        #            dispatcher.connect(self.on_dep_ok, signal=signal, sender=dep.node, weak=False)

        # Register the callbacks for the Tasks.

        #self.show_receivers()

    def show_receivers(self, sender=dispatcher.Any, signal=dispatcher.Any):
        print("*** live receivers ***")
        for rec in dispatcher.liveReceivers(
                dispatcher.getReceivers(sender, signal)):
            print("receiver -->", rec)
        print("*** end live receivers ***")
Пример #12
0
class Workflow(BaseWorkflow):
    """
    A Workflow is a list of (possibly connected) tasks.
    """
    Error = WorkflowError

    def __init__(self, workdir=None, manager=None):
        """
        Args:
            workdir:
                Path to the working directory.
            manager:
                `TaskManager` object.
        """
        super(Workflow, self).__init__()

        self._tasks = []

        if workdir is not None:
            self.set_workdir(workdir)

        if manager is not None:
            self.set_manager(manager)

    def set_manager(self, manager):
        """Set the `TaskManager` to use to launch the Task."""
        self.manager = manager.deepcopy()
        for task in self:
            task.set_manager(manager)

    def set_workdir(self, workdir):
        """Set the working directory. Cannot be set more than once."""

        if hasattr(self, "workdir") and self.workdir != workdir:
            raise ValueError("self.workdir != workdir: %s, %s" % (self.workdir,  workdir))

        self.workdir = os.path.abspath(workdir)
                                                                       
        # Directories with (input|output|temporary) data.
        # The workflow will use these directories to connect 
        # itself to other workflows and/or to produce new data 
        # that will be used by its children.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

    def __len__(self):
        return len(self._tasks)

    def __iter__(self):
        return self._tasks.__iter__()

    def __getitem__(self, slice):
        return self._tasks[slice]

    def chunks(self, chunk_size):
        """Yield successive chunks of tasks of lenght chunk_size."""
        for tasks in chunks(self, chunk_size):
            yield tasks

    def opath_from_ext(self, ext):
        """
        Returns the path of the output file with extension ext.
        Use it when the file does not exist yet.
        """
        return self.indir.path_in("in_" + ext)

    def opath_from_ext(self, ext):
        """
        Returns the path of the output file with extension ext.
        Use it when the file does not exist yet.
        """
        return self.outdir.path_in("out_" + ext)

    @property
    def processes(self):
        return [task.process for task in self]

    @property
    def all_done(self):
        """True if all the `Task` in the `Workflow` are done."""
        return all(task.status >= task.S_DONE for task in self)

    @property
    def isnc(self):
        """True if norm-conserving calculation."""
        return all(task.isnc for task in self)

    @property
    def ispaw(self):
        """True if PAW calculation."""
        return all(task.ispaw for task in self)

    def status_counter(self):
        """
        Returns a `Counter` object that counts the number of task with 
        given status (use the string representation of the status as key).
        """
        counter = collections.Counter() 

        for task in self:
            counter[str(task.status)] += 1

        return counter

    def allocate(self):

        for i, task in enumerate(self):
            if not hasattr(task, "manager"):
                task.set_manager(self.manager)

            task_workdir = os.path.join(self.workdir, "task_" + str(i))

            if not hasattr(task, "workdir"):
                task.set_workdir(task_workdir)
            else:
                if task.workdir != task_workdir:
                    raise ValueError("task.workdir != task_workdir: %s, %s" % (task.workdir, task_workdir))

    def register(self, obj, deps=None, manager=None, task_class=None):
        """
        Registers a new `Task` and add it to the internal list, taking into account possible dependencies.

        Args:
            obj:
                `Strategy` object or `AbinitInput` instance.
                if Strategy object, we create a new `AbinitTask` from the input strategy and add it to the list.
            deps:
                Dictionary specifying the dependency of this node.
                None means that this obj has no dependency.
            manager:
                The `TaskManager` responsible for the submission of the task. If manager is None, we use 
                the `TaskManager` specified during the creation of the `Workflow`.
            task_class:
                Task subclass to instantiate. Default: `AbinitTask` 

        Returns:   
            `Task` object
        """
        task_workdir = None
        if hasattr(self, "workdir"):
            task_workdir = os.path.join(self.workdir, "task_" + str(len(self)))

        if isinstance(obj, Task):
            task = obj

        else:
            # Set the class
            if task_class is None:
                task_class = AbinitTask

            if isinstance(obj, Strategy):
                # Create the new task (note the factory so that we create subclasses easily).
                task = task_class(obj, task_workdir, manager)

            else:
                task = task_class.from_input(obj, task_workdir, manager)

        self._tasks.append(task)

        # Handle possible dependencies.
        if deps is not None:
            deps = [Dependency(node, exts) for (node, exts) in deps.items()]
            task.add_deps(deps)

        return task

    def path_in_workdir(self, filename):
        """Create the absolute path of filename in the working directory."""
        return os.path.join(self.workdir, filename)

    def setup(self, *args, **kwargs):
        """
        Method called before running the calculations.
        The default implementation is empty.
        """

    def build(self, *args, **kwargs):
        """Creates the top level directory."""
        # Create the directories of the workflow.
        self.indir.makedirs()
        self.outdir.makedirs()
        self.tmpdir.makedirs()

        # Build dirs and files of each task.
        for task in self:
            task.build(*args, **kwargs)

        # Connect signals within the workflow.
        self.connect_signals()

    @property
    def status(self):
        """
        Returns the status of the workflow i.e. the minimum of the status of the tasks.
        """
        return self.get_all_status(only_min=True)

    #def set_status(self, status):

    def get_all_status(self, only_min=False):
        """
        Returns a list with the status of the tasks in self.

        Args:
            only_min:
                If True, the minimum of the status is returned.
        """
        if len(self) == 0:
            # The workflow will be created in the future.
            if only_min:
                return self.S_INIT
            else:
                return [self.S_INIT]

        self.check_status()

        status_list = [task.status for task in self]
        #print("status_list", status_list)

        if only_min:
            return min(status_list)
        else:
            return status_list

    def check_status(self):
        """Check the status of the tasks."""
        # Recompute the status of the tasks
        for task in self:
            task.check_status()

        # Take into account possible dependencies.
        for task in self:
            if task.status <= task.S_SUB and all(status == task.S_OK for status in task.deps_status): 
                task.set_status(task.S_READY)

    def rmtree(self, exclude_wildcard=""):
        """
        Remove all files and directories in the working directory

        Args:
            exclude_wildcard:
                Optional string with regular expressions separated by |.
                Files matching one of the regular expressions will be preserved.
                example: exclude_wildard="*.nc|*.txt" preserves all the files
                whose extension is in ["nc", "txt"].

        """
        if not exclude_wildcard:
            shutil.rmtree(self.workdir)

        else:
            w = WildCard(exclude_wildcard)

            for dirpath, dirnames, filenames in os.walk(self.workdir):
                for fname in filenames:
                    path = os.path.join(dirpath, fname)
                    if not w.match(fname):
                        os.remove(path)

    def rm_indatadir(self):
        """Remove all the indata directories."""
        for task in self:
            task.rm_indatadir()

    def rm_outdatadir(self):
        """Remove all the indata directories."""
        for task in self:
            task.rm_outatadir()

    def rm_tmpdatadir(self):
        """Remove all the tmpdata directories."""
        for task in self:
            task.rm_tmpdatadir()

    def move(self, dest, isabspath=False):
        """
        Recursively move self.workdir to another location. This is similar to the Unix "mv" command.
        The destination path must not already exist. If the destination already exists
        but is not a directory, it may be overwritten depending on os.rename() semantics.

        Be default, dest is located in the parent directory of self.workdir, use isabspath=True
        to specify an absolute path.
        """
        if not isabspath:
            dest = os.path.join(os.path.dirname(self.workdir), dest)

        shutil.move(self.workdir, dest)

    def submit_tasks(self, wait=False):
        """
        Submits the task in self and wait.
        TODO: change name.
        """
        for task in self:
            task.start()

        if wait: 
            for task in self: task.wait()

    def start(self, *args, **kwargs):
        """
        Start the work. Calls build and _setup first, then submit the tasks.
        Non-blocking call unless wait is set to True
        """
        wait = kwargs.pop("wait", False)

        # Build dirs and files.
        self.build(*args, **kwargs)

        # Initial setup
        self._setup(*args, **kwargs)

        # Submit tasks (does not block)
        self.submit_tasks(wait=wait)

    def read_etotal(self):
        """
        Reads the total energy from the GSR file produced by the task.

        Return a numpy array with the total energies in Hartree
        The array element is set to np.inf if an exception is raised while reading the GSR file.
        """
        if not self.all_done:
            raise self.Error("Some task is still in running/submitted state")

        etotal = []
        for task in self:
            # Open the GSR file and read etotal (Hartree)
            gsr_path = task.outdir.has_abiext("GSR")
            etot = np.inf
            if gsr_path:
                with ETSF_Reader(gsr_path) as r:
                    etot = r.read_value("etotal")
                
            etotal.append(etot)

        return etotal

    def json_dump(self, filename):
        json_pretty_dump(self.to_dict, filename)
                                                  
    @classmethod
    def json_load(cls, filename):
        return cls.from_dict(json_load(filename))

    def parse_timers(self):
        """
        Parse the TIMER section reported in the ABINIT output files.

        Returns:
            `AbinitTimerParser` object
        """
        filenames = filter(os.path.exists, [task.output_file.path for task in self])
                                                                           
        parser = AbinitTimerParser()
        parser.parse(filenames)
                                                                           
        return parser
Пример #13
0
class AbinitFlow(Node):
    #class AbinitFlow(collections.Iterable):
    """
    This object is a container of workflows. Its main task is managing the 
    possible inter-depedencies among the workflows and the creation of
    dynamic worfflows that are generates by callbacks registered by the user.

    .. attributes:

        creation_date:
            String with the creation_date

        pickle_protocol: 
            Protocol for Pickle database (default: -1 i.e. latest protocol)
    """
    VERSION = "0.1"

    PICKLE_FNAME = "__AbinitFlow__.pickle"

    def __init__(self,
                 workdir,
                 manager,
                 auto_restart=False,
                 pickle_protocol=-1):
        """
        Args:
            workdir:
                String specifying the directory where the workflows will be produced.
            manager:
                `TaskManager` object responsible for the submission of the jobs.
            auto_restart:
                True if unconverged calculations should be restarted automatically.
            pickle_procol:
                Pickle protocol version used for saving the status of the object.
                -1 denotes the latest version supported by the python interpreter.
        """
        super(AbinitFlow, self).__init__()

        self.workdir = os.path.abspath(workdir)
        self.creation_date = time.asctime()

        self.manager = manager.deepcopy()
        self.auto_restart = auto_restart

        # List of workflows.
        self._works = []

        # List of callbacks that must be executed when the dependencies reach S_OK
        self._callbacks = []

        # Directories with (input|output|temporary) data.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

        self.pickle_protocol = int(pickle_protocol)

        # Signal slots: a dictionary with the list
        # of callbacks indexed by node_id and SIGNAL_TYPE.
        # When the node changes its status, it broadcast a signal.
        # The flow is listening to all the nodes of the calculation
        # [node_id][SIGNAL] = list_of_signal_handlers
        #self._sig_slots =  slots = {}
        #for work in self:
        #    slots[work] = {s: [] for s in work.S_ALL}

        #for task in self.iflat_tasks():
        #    slots[task] = {s: [] for s in work.S_ALL}

    def __len__(self):
        return len(self.works)

    def __iter__(self):
        return self.works.__iter__()

    def __getitem__(self, slice):
        return self.works[slice]

    @property
    def works(self):
        """List of `Workflow` objects contained in self.."""
        return self._works

    @property
    def all_ok(self):
        """True if all the tasks in workflows have reached S_OK."""
        return all(work.all_ok for work in self)

    #@property
    #def completed(self):
    #    """True if all the tasks of the flow have reached S_OK."""
    #    return all(task.status == task.S_OK for task in self.iflat_tasks())

    def iflat_tasks_wti(self, status=None, op="="):
        """
        Returns:
            (task, work_index, task_index)
        """
        return self._iflat_tasks_wti(status=status, op=op, with_wti=True)

    def iflat_tasks(self, status=None, op="="):
        """
        Returns:
            task
        """
        return self._iflat_tasks_wti(status=status, op=op, with_wti=False)

    def _iflat_tasks_wti(self, status=None, op="=", with_wti=True):
        """
        Generators that produces a flat sequence of task.
        if status is not None, only the tasks with the specified status are selected.

        Returns:
            (task, work_index, task_index) if with_wti is True else task
        """
        if status is None:
            for wi, work in enumerate(self):
                for ti, task in enumerate(work):
                    if with_wti:
                        yield task, wi, ti
                    else:
                        yield task

        else:
            # Get the operator from the string.
            import operator
            op = {
                "=": operator.eq,
                "!=": operator.ne,
                ">": operator.gt,
                ">=": operator.ge,
                "<": operator.lt,
                "<=": operator.le,
            }[op]

            for wi, work in enumerate(self):
                for ti, task in enumerate(work):
                    if op(task.status, status):
                        if with_wti:
                            yield task, wi, ti
                        else:
                            yield task

    @property
    def ncpus_reserved(self):
        """
        Returns the number of CPUs reserved in this moment.
        A CPUS is reserved if it's still not running but 
        we have submitted the task to the queue manager.
        """
        return sum(work.ncpus_reverved for work in self)

    @property
    def ncpus_allocated(self):
        """
        Returns the number of CPUs allocated in this moment.
        A CPU is allocated if it's running a task or if we have
        submitted a task to the queue manager but the job is still pending.
        """
        return sum(work.ncpus_allocated for work in self)

    @property
    def ncpus_inuse(self):
        """
        Returns the number of CPUs used in this moment.
        A CPU is used if there's a job that is running on it.
        """
        return sum(work.ncpus_inuse for work in self)

    def check_status(self):
        """Check the status of the workflows in self."""
        for work in self:
            work.check_status()

        # Test whether some task should be restarted.
        if self.auto_restart:
            num_restarts = 0
            for task, wt in self.iflat_tasks(status=Task.S_UNCONVERGED):
                msg = "AbinitFlow will try restart task %s" % task
                print(msg)
                logger.info(msg)
                retcode = task.restart_if_needed()
                if retcode == 0:
                    num_restarts += 1

            if num_restarts:
                print("num_restarts done successfully: ", num_restarts)
                self.pickle_dump()

    def show_status(self, stream=sys.stdout):
        """
        Report the status of the workflows and the status 
        of the different tasks on the specified stream.
        """
        for i, work in enumerate(self):
            print(80 * "=")
            print("Workflow #%d: %s, Finalized=%s\n" %
                  (i, work, work.finalized))

            table = [[
                "Task", "Status", "Queue_id", "Errors", "Warnings", "Comments",
                "MPI", "OMP", "num_restarts", "max_restarts", "Task Class"
            ]]

            for task in work:
                task_name = os.path.basename(task.name)

                # Parse the events in the main output.
                report = task.get_event_report()

                events = map(str, 3 * ["N/A"])
                if report is not None:
                    events = map(str, [
                        report.num_errors, report.num_warnings,
                        report.num_comments
                    ])

                cpu_info = map(str, [task.mpi_ncpus, task.omp_ncpus])
                task_info = map(str, [
                    task.num_restarts, task.max_num_restarts,
                    task.__class__.__name__
                ])

                table.append([task_name,
                              str(task.status),
                              str(task.queue_id)] + events + cpu_info +
                             task_info)

            pprint_table(table, out=stream)

    def build(self, *args, **kwargs):
        """Make directories and files of the `Flow`."""
        self.indir.makedirs()
        self.outdir.makedirs()
        self.tmpdir.makedirs()

        for work in self:
            work.build(*args, **kwargs)

    def build_and_pickle_dump(self):
        """
        Build dirs and file of the `Flow` and save the object in pickle format.

        Returns:
            0 if success
        """
        self.build()
        return self.pickle_dump()

    def pickle_dump(self):
        """
        Save the status of the object in pickle format.

        Returns:
            0 if success
        """
        protocol = self.pickle_protocol
        filepath = os.path.join(self.workdir, self.PICKLE_FNAME)

        with FileLock(filepath) as lock:
            with open(filepath, mode="w" if protocol == 0 else "wb") as fh:
                pickle.dump(self, fh, protocol=protocol)

        # Atomic transaction.
        #filepath_new = filepath + ".new"
        #filepath_save = filepath + ".save"
        #shutil.copyfile(filepath, filepath_save)

        #try:
        #    with open(filepath_new, mode="w" if protocol == 0 else "wb") as fh:
        #        pickle.dump(self, fh, protocol=protocol)

        #    os.rename(filepath_new, filepath)
        #except IOError:
        #    os.rename(filepath_save, filepath)
        #finally:
        #    try
        #        os.remove(filepath_save)
        #    except:
        #        pass
        #    try
        #        os.remove(filepath_new)
        #    except:
        #        pass
        return 0

    @classmethod
    def pickle_load(cls, filepath, disable_signals=False):
        """
        Loads the object from a pickle file and performs initial setup.

        Args:
            filepath:
                Filename or directory name. It filepath is a directory, we 
                scan the directory tree starting from filepath and we 
                read the first pickle database.
            disable_signals:
                If True, the nodes of the flow are not connected by signals.
                This option is usually used when we want to read a flow 
                in read-only mode and we want to avoid any possible side effect.
        """
        if os.path.isdir(filepath):
            # Walk through each directory inside path and find the pickle database.
            for dirpath, dirnames, filenames in os.walk(filepath):
                fnames = [f for f in filenames if f == cls.PICKLE_FNAME]
                if fnames:
                    assert len(fnames) == 1
                    filepath = os.path.join(dirpath, fnames[0])
                    break
            else:
                err_msg = "Cannot find %s inside directory %s" % (
                    cls.PICKLE_FNAME, path)
                raise ValueError(err_msg)

        with FileLock(filepath) as lock:
            with open(filepath, "rb") as fh:
                flow = pickle.load(fh)

        if not disable_signals:
            flow.connect_signals()

        # Recompute the status of each task since tasks that
        # have been submitted previously might be completed.
        flow.check_status()
        return flow

    def register_task(self, input, deps=None, manager=None, task_class=None):
        """
        Utility function that generates a `Workflow` made of a single task

        Args:
            input:
                Abinit Input file or `Strategy` object of `Task` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            task_class:
                Task subclass to instantiate. Default: `AbinitTask` 

        Returns:   
            The generated `Task`.
        """
        work = Workflow(manager=manager)
        task = work.register(input, deps=deps, task_class=task_class)
        self.register_work(work)

        return task

    def register_work(self, work, deps=None, manager=None, workdir=None):
        """
        Register a new `Workflow` and add it to the internal list, 
        taking into account possible dependencies.

        Args:
            work:
                `Workflow` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            workdir:
                The name of the directory used for the `Workflow`.

        Returns:   
            The registered `Workflow`.
        """
        # Directory of the workflow.
        if workdir is None:
            work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))
        else:
            work_workdir = os.path.join(self.workdir,
                                        os.path.basenane(workdir))

        work.set_workdir(work_workdir)

        if manager is not None:
            work.set_manager(manager)

        self.works.append(work)

        if deps:
            deps = [Dependency(node, exts) for node, exts in deps.items()]
            work.add_deps(deps)

        return work

    def register_cbk(self, cbk, cbk_data, deps, work_class, manager=None):
        """
        Registers a callback function that will generate the `Task` of the `Workflow`.

        Args:
            cbk:
                Callback function.
            cbk_data
                Additional data passed to the callback function.
            deps:
                List of `Dependency` objects specifying the dependency of the workflow.
            work_class:
                `Workflow` class to instantiate.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the `Flow`.
                                                                                                            
        Returns:   
            The `Workflow` that will be finalized by the callback.
        """
        # TODO: pass a workflow factory instead of a class
        # Directory of the workflow.
        work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))

        # Create an empty workflow and register the callback
        work = work_class(workdir=work_workdir, manager=manager)

        self._works.append(work)

        deps = [Dependency(node, exts) for node, exts in deps.items()]
        if not deps:
            raise ValueError("A callback must have deps!")

        work.add_deps(deps)

        # Wrap the callable in a Callback object and save
        # useful info such as the index of the workflow and the callback data.
        cbk = Callback(cbk, work, deps=deps, cbk_data=cbk_data)

        self._callbacks.append(cbk)

        return work

    def allocate(self, manager=None):
        """
        Allocate the `AbinitFlow` i.e. assign the `workdir` and (optionally) 
        the `TaskManager` to the different tasks in the Flow.
        """
        for work in self:
            work.allocate(manager=self.manager)
            work.set_flow(self)

        for task in self.iflat_tasks():
            task.set_flow(self)

        return self

    def show_dependencies(self):
        for work in self:
            work.show_intrawork_deps()

    def on_dep_ok(self, signal, sender):
        # TODO
        # Replace this callback with dynamic dispatch
        # on_all_S_OK for workflow
        # on_S_OK for task
        print("on_dep_ok with sender %s, signal %s" % (str(sender), signal))

        for i, cbk in enumerate(self._callbacks):

            if not cbk.handle_sender(sender):
                print("Do not handle")
                continue

            if not cbk.can_execute():
                print("cannot execute")
                continue

            # Execute the callback to generate the workflow.
            print("about to build new workflow")
            #empty_work = self._works[cbk.w_idx]

            # TODO better treatment of ids
            # Make sure the new workflow has the same id as the previous one.
            #new_work_idx = cbk.w_idx
            work = cbk(flow=self)
            work.add_deps(cbk.deps)

            # Disable the callback.
            cbk.disable()

            # Update the database.
            self.pickle_dump()

    #def finalize(self):
    #    """This method is called when the flow is completed."""

    def connect_signals(self):
        """
        Connect the signals within the workflow.
        self is responsible for catching the important signals raised from 
        its task and raise new signals when some particular condition occurs.
        """
        # Connect the signals inside each Workflow.
        for work in self:
            work.connect_signals()

        # Observe the nodes that must reach S_OK in order to call the callbacks.
        for cbk in self._callbacks:
            for dep in cbk.deps:
                print("connecting %s \nwith sender %s, signal %s" %
                      (str(cbk), dep.node, dep.node.S_OK))
                dispatcher.connect(self.on_dep_ok,
                                   signal=dep.node.S_OK,
                                   sender=dep.node,
                                   weak=False)

        # Associate to each signal the callback _on_signal
        # (bound method of the node that will be called by `AbinitFlow`
        # Each node will set its attribute _done_signal to True to tell
        # the flow that this callback should be disabled.

        # Register the callbacks for the Workflows.
        #for work in self:
        #    slot = self._sig_slots[work]
        #    for signal in S_ALL:
        #        done_signal = getattr(work, "_done_ " + signal, False)
        #        if not done_sig:
        #            cbk_name = "_on_" + str(signal)
        #            cbk = getattr(work, cbk_name, None)
        #            if cbk is None: continue
        #            slot[work][signal].append(cbk)
        #            print("connecting %s\nwith sender %s, signal %s" % (str(cbk), dep.node, dep.node.S_OK))
        #            dispatcher.connect(self.on_dep_ok, signal=signal, sender=dep.node, weak=False)

        # Register the callbacks for the Tasks.

        #self.show_receivers()

    def show_receivers(self, sender=dispatcher.Any, signal=dispatcher.Any):
        print("*** live receivers ***")
        for rec in dispatcher.liveReceivers(
                dispatcher.getReceivers(sender, signal)):
            print("receiver -->", rec)
        print("*** end live receivers ***")
Пример #14
0
class AbinitFlow(Node):
    """
    This object is a container of workflows. Its main task is managing the 
    possible inter-depedencies among the workflows and the creation of
    dynamic worfflows that are generates by callbacks registered by the user.

    .. attributes:

        creation_date:
            String with the creation_date

        pickle_protocol: 
            Protocol for Pickle database (default: -1 i.e. latest protocol)
    """
    VERSION = "0.1"

    PICKLE_FNAME = "__AbinitFlow__.pickle"

    def __init__(self, workdir, manager, pickle_protocol=-1):
        """
        Args:
            workdir:
                String specifying the directory where the workflows will be produced.
            manager:
                `TaskManager` object responsible for the submission of the jobs.
            pickle_procol:
                Pickle protocol version used for saving the status of the object.
                -1 denotes the latest version supported by the python interpreter.
        """
        super(AbinitFlow, self).__init__()

        self.set_workdir(workdir)

        self.creation_date = time.asctime()

        self.manager = manager.deepcopy()

        # List of workflows.
        self._works = []

        # List of callbacks that must be executed when the dependencies reach S_OK
        self._callbacks = []

        self.pickle_protocol = int(pickle_protocol)

        # TODO
        # Signal slots: a dictionary with the list 
        # of callbacks indexed by node_id and SIGNAL_TYPE.
        # When the node changes its status, it broadcast a signal.
        # The flow is listening to all the nodes of the calculation
        # [node_id][SIGNAL] = list_of_signal_handlers
        #self._sig_slots =  slots = {}
        #for work in self:
        #    slots[work] = {s: [] for s in work.S_ALL}

        #for task in self.iflat_tasks():
        #    slots[task] = {s: [] for s in work.S_ALL}

    def set_workdir(self, workdir, chroot=False):
        """
        Set the working directory. Cannot be set more than once unless chroot is True
        """
        if not chroot and hasattr(self, "workdir") and self.workdir != workdir:
            raise ValueError("self.workdir != workdir: %s, %s" % (self.workdir,  workdir))

        # Directories with (input|output|temporary) data.
        self.workdir = os.path.abspath(workdir)
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

    @classmethod
    def pickle_load(cls, filepath, disable_signals=False):
        """
        Loads the object from a pickle file and performs initial setup.

        Args:
            filepath:
                Filename or directory name. It filepath is a directory, we 
                scan the directory tree starting from filepath and we 
                read the first pickle database.
            disable_signals:
                If True, the nodes of the flow are not connected by signals.
                This option is usually used when we want to read a flow 
                in read-only mode and we want to avoid any possible side effect.
        """
        if os.path.isdir(filepath):
            # Walk through each directory inside path and find the pickle database.
            for dirpath, dirnames, filenames in os.walk(filepath):
                fnames = [f for f in filenames if f == cls.PICKLE_FNAME]
                if fnames:
                    assert len(fnames) == 1
                    filepath = os.path.join(dirpath, fnames[0])
                    break
            else:
                err_msg = "Cannot find %s inside directory %s" % (cls.PICKLE_FNAME, filepath)
                raise ValueError(err_msg)

        #with FileLock(filepath) as lock:
        with open(filepath, "rb") as fh:
            flow = pickle.load(fh)

        # Check if versions match.
        if flow.VERSION != cls.VERSION:
            msg = ("File flow version %s != latest version %s\n."
                   "Regerate the flow to solve the problem " % (flow.VERSION, cls.VERSION))
            warnings.warn(msg)

        if not disable_signals:
            flow.connect_signals()

        # Recompute the status of each task since tasks that
        # have been submitted previously might be completed.
        flow.check_status()
        return flow

    def __len__(self):
        return len(self.works)

    def __iter__(self):
        return self.works.__iter__()

    def __getitem__(self, slice):
        return self.works[slice]

    @property
    def works(self):
        """List of `Workflow` objects contained in self.."""
        return self._works

    @property
    def all_ok(self):
        """True if all the tasks in workflows have reached S_OK."""
        return all(work.all_ok for work in self)

    @property
    def all_tasks(self):
        return self.iflat_tasks()

    @property
    def num_tasks(self):
        """Total number of tasks"""
        return len(list(self.iflat_tasks()))

    @property
    def errored_tasks(self):
        """List of errored tasks."""
        return list(self.iflat_tasks(status=self.S_ERROR))

    @property
    def num_errored_tasks(self):
        """The number of tasks whose status is `S_ERROR`."""
        return len(self.errored_tasks)

    @property
    def unconverged_tasks(self):
        """List of unconverged tasks."""
        return list(self.iflat_tasks(status=self.S_UNCONVERGED))

    @property
    def num_unconverged_tasks(self):
        """The number of tasks whose status is `S_UNCONVERGED`."""
        return len(self.unconverged_tasks)

    @property
    def status_counter(self):
        """
        Returns a `Counter` object that counts the number of tasks with 
        given status (use the string representation of the status as key).
        """
        # Count the number of tasks with given status in each workflow.
        counter = self[0].status_counter
        for work in self[1:]:
            counter += work.status_counter

        return counter

    @property
    def ncpus_reserved(self):
        """
        Returns the number of CPUs reserved in this moment.
        A CPUS is reserved if the task is not running but 
        we have submitted the task to the queue manager.
        """
        return sum(work.ncpus_reverved for work in self)

    @property
    def ncpus_allocated(self):
        """
        Returns the number of CPUs allocated in this moment.
        A CPU is allocated if it's running a task or if we have
        submitted a task to the queue manager but the job is still pending.
        """
        return sum(work.ncpus_allocated for work in self)

    @property
    def ncpus_inuse(self):
        """
        Returns the number of CPUs used in this moment.
        A CPU is used if there's a job that is running on it.
        """
        return sum(work.ncpus_inuse for work in self)

    @property
    def has_chrooted(self):
        """
        Returns a string that evaluates to True if we have changed 
        the workdir for visualization purposes e.g. we are using sshfs.
        to mount the remote directory where the `Flow` is located.
        The string gives the previous workdir of the flow.
        """
        try:
            return self._chrooted_from

        except AttributeError:
            return ""

    def chroot(self, new_workdir):
        """
        Change the workir of the `Flow`. Mainly used for
        allowing the user to open the GUI on the local host
        and access the flow from remote via sshfs.

        .. note:
            Calling this method will make the flow go in read-only mode.
        """
        self._chrooted_from = self.workdir
        self.set_workdir(new_workdir, chroot=True)

        for i, work in enumerate(self):
            new_wdir = os.path.join(self.workdir, "work_" + str(i))
            work.chroot(new_wdir)

    def groupby_status(self):
        """
        Returns a ordered dictionary mapping the task status to 
        the list of named tuples (task, work_index, task_index).
        """
        Entry = collections.namedtuple("Entry", "task wi ti")
        d = collections.defaultdict(list)

        for task, wi, ti in self.iflat_tasks_wti():
            d[task.status].append(Entry(task, wi, ti))

        # Sort keys according to their status.
        return collections.OrderedDict([(k, d[k]) for k in sorted(list(d.keys()))])

    def iflat_tasks_wti(self, status=None, op="=="):
        """
        Generator to iterate over all the tasks of the `Flow`.
        Yields

            (task, work_index, task_index)

        If status is not None, only the tasks whose status satisfies
        the condition (task.status op status) are selected
        status can be either one of the flags defined in the `Task` class 
        (e.g Task.S_OK) or a string e.g "S_OK" 
        """
        return self._iflat_tasks_wti(status=status, op=op, with_wti=True)

    def iflat_tasks(self, status=None, op="=="):
        """
        Generator to iterate over all the tasks of the `Flow`.

        If status is not None, only the tasks whose status satisfies
        the condition (task.status op status) are selected
        status can be either one of the flags defined in the `Task` class 
        (e.g Task.S_OK) or a string e.g "S_OK" 
        """
        return self._iflat_tasks_wti(status=status, op=op, with_wti=False)

    def _iflat_tasks_wti(self, status=None, op="==", with_wti=True):
        """
        Generators that produces a flat sequence of task.
        if status is not None, only the tasks with the specified status are selected.

        Returns:
            (task, work_index, task_index) if with_wti is True else task
        """
        if status is None:
            for wi, work in enumerate(self):
                for ti, task in enumerate(work):
                    if with_wti:
                        yield task, wi, ti
                    else:
                        yield task

        else:
            # Get the operator from the string.
            import operator
            op = {
                "==": operator.eq,
                "!=": operator.ne,
                ">": operator.gt,
                ">=": operator.ge,
                "<": operator.lt,
                "<=": operator.le,
            }[op]

            # Accept Task.S_FLAG or string.
            if is_string(status):
                status = getattr(Task, status)

            for wi, work in enumerate(self):
                for ti, task in enumerate(work):
                    if op(task.status, status):
                        if with_wti:
                            yield task, wi, ti
                        else:
                            yield task

    def check_dependencies(self):
        """Test the dependencies of the nodes for possible deadlocks."""
        deadlocks = []

        for task in self.all_tasks:
            for dep in task.deps:
                if dep.node.depends_on(task):
                    deadlocks.append((task, dep.node))

        if deadlocks:
           lines = ["Detect wrong list of dependecies that will lead to a deadlock:"]
           lines.extend(["%s <--> %s" % nodes for nodes in deadlocks])
           raise ValueError("\n".join(lines))

    #def detect_deadlock(self):
    #    eu_tasks = list(self.errored_tasks) + list(self.unconverged_tasks)
    #     if not eu_tasks:
    #        return []

    #    deadlocked = []
    #    for task in self.all_tasks:
    #        if any(task.depends_on(eu_task) for eu_task in eu_tasks):
    #            deadlocked.append(task)

    #    return deadlocked

    def check_status(self):
        """Check the status of the workflows in self."""
        for work in self:
            work.check_status()

    def show_status(self, stream=sys.stdout):
        """
        Report the status of the workflows and the status 
        of the different tasks on the specified stream.
        """
        for i, work in enumerate(self):
            print(80*"=")
            print("Workflow #%d: %s, Finalized=%s\n" % (i, work, work.finalized) )

            table = [["Task", "Status", "Queue_id", 
                      "Errors", "Warnings", "Comments", 
                      "MPI", "OMP", 
                      "num_restarts", "Task Class"
                     ]]

            for task in work:
                task_name = os.path.basename(task.name)

                # Parse the events in the main output.
                report = task.get_event_report()

                events = map(str, 3*["N/A"])
                if report is not None: 
                    events = map(str, [report.num_errors, report.num_warnings, report.num_comments])

                cpu_info = map(str, [task.mpi_ncpus, task.omp_ncpus])
                task_info = map(str, [task.num_restarts, task.__class__.__name__])

                table.append(
                    [task_name, str(task.status), str(task.queue_id)] + 
                    events + 
                    cpu_info + 
                    task_info
                    )

            pprint_table(table, out=stream)

    def open_files(self, what="o", wti=None, status=None, op="==", editor=None):
        """
        Open the files of the flow inside an editor (command line interface).

        Args:
            what:
                string with the list of characters selecting the file type
                Possible choices:
                    i ==> input_file,
                    o ==> output_file,
                    f ==> files_file,
                    j ==> job_file,
                    l ==> log_file,
                    e ==> stderr_file,
                    q ==> qerr_file,
            wti
                tuple with the (work, task_index) to select
                or string in the form w_start:w_stop,task_start:task_stop
            status
                if not None, only the tasks with this status are select
            op:
                status operator. Requires status. A task is selected 
                if task.status op status evaluates to true.
            editor:
                Select the editor. None to use the default editor ($EDITOR shell env var)
        """
        #TODO: Add support for wti
        if wti is not None:
            raise NotImplementedError("wti option is not avaiable!")

        def get_files(task, wi, ti):
            """Helper function used to select the files of a task."""
            choices = {
                "i": task.input_file,
                "o": task.output_file,
                "f": task.files_file,
                "j": task.job_file,
                "l": task.log_file,
                "e": task.stderr_file,
                "q": task.qerr_file,
                #"q": task.qout_file,
            }

            selected = []
            for c in what:
                try:
                    selected.append(getattr(choices[c], "path"))
                except KeyError:
                    import warnings
                    warnings.warn("Wrong keywork %s" % c)
            return selected

        # Build list of files to analyze.
        files = []
        for (task, wi, ti) in self.iflat_tasks_wti(status=status, op=op):
            lst = get_files(task, wi, ti)
            if lst: files.extend(lst)

        #print(files)
        return Editor(editor=editor).edit_files(files)

    def cancel(self):
        """
        Cancel all the tasks that are in the queue.

        Returns:
            Number of jobs cancelled, negative value if error
        """
        if self.has_chrooted:
            # TODO: Use paramiko to kill the job?
            warnings.warn("Cannot cancel the flow via sshfs!")
            return -1

        # If we are running with the scheduler, we must send a SIGKILL signal.
        pid_file = os.path.join(self.workdir, "_PyFlowScheduler.pid")
        if os.path.exists(pid_file):
            with open(pid_file, "r") as fh:
                pid = int(fh.readline())
                
            retcode = os.system("kill -9 %d" % pid)
            print("Sent SIGKILL to the scheduler, retcode = %s" % retcode)
            try:
                os.remove(pid_file)
            except IOError:
                pass

        num_cancelled = 0
        for task in self.iflat_tasks():
            num_cancelled += task.cancel()

        return num_cancelled

    def build(self, *args, **kwargs):
        """Make directories and files of the `Flow`."""
        self.indir.makedirs()
        self.outdir.makedirs()
        self.tmpdir.makedirs()

        for work in self:
            work.build(*args, **kwargs)

    def build_and_pickle_dump(self):
        """
        Build dirs and file of the `Flow` and save the object in pickle format.

        Returns:
            0 if success
        """
        self.build()
        return self.pickle_dump()

    def pickle_dump(self):
        """
        Save the status of the object in pickle format.

        Returns:
            0 if success
        """
        if self.has_chrooted:
            warnings.warn("Cannot pickle_dump since we have chrooted from %s" % self.has_chrooted)
            return -1

        protocol = self.pickle_protocol
        filepath = os.path.join(self.workdir, self.PICKLE_FNAME)

        with FileLock(filepath) as lock:
            with open(filepath, mode="w" if protocol == 0 else "wb") as fh:
                pickle.dump(self, fh, protocol=protocol)

        # Atomic transaction.
        #filepath_new = filepath + ".new"
        #filepath_save = filepath + ".save"
        #shutil.copyfile(filepath, filepath_save)

        #try:
        #    with open(filepath_new, mode="w" if protocol == 0 else "wb") as fh:
        #        pickle.dump(self, fh, protocol=protocol)

        #    os.rename(filepath_new, filepath)
        #except IOError:
        #    os.rename(filepath_save, filepath)
        #finally:
        #    try
        #        os.remove(filepath_save)
        #    except:
        #        pass
        #    try
        #        os.remove(filepath_new)
        #    except:
        #        pass
        return 0

    def register_task(self, input, deps=None, manager=None, task_class=None):
        """
        Utility function that generates a `Workflow` made of a single task

        Args:
            input:
                Abinit Input file or `Strategy` object of `Task` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            task_class:
                Task subclass to instantiate. Default: `AbinitTask` 

        Returns:   
            The generated `Task`.
        """
        work = Workflow(manager=manager)
        task = work.register(input, deps=deps, task_class=task_class)
        self.register_work(work)

        return task

    def register_work(self, work, deps=None, manager=None, workdir=None):
        """
        Register a new `Workflow` and add it to the internal list, 
        taking into account possible dependencies.

        Args:
            work:
                `Workflow` object.
            deps:
                List of `Dependency` objects specifying the dependency of this node.
                An empy list of deps implies that this node has no dependencies.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the workflow.
            workdir:
                The name of the directory used for the `Workflow`.

        Returns:   
            The registered `Workflow`.
        """
        # Directory of the workflow.
        if workdir is None:
            work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))
        else:
            work_workdir = os.path.join(self.workdir, os.path.basename(workdir))

        work.set_workdir(work_workdir)

        if manager is not None:
            work.set_manager(manager)

        self.works.append(work)

        if deps:
            deps = [Dependency(node, exts) for node, exts in deps.items()]
            work.add_deps(deps)

        return work

    def register_cbk(self, cbk, cbk_data, deps, work_class, manager=None):
        """
        Registers a callback function that will generate the `Task` of the `Workflow`.

        Args:
            cbk:
                Callback function.
            cbk_data
                Additional data passed to the callback function.
            deps:
                List of `Dependency` objects specifying the dependency of the workflow.
            work_class:
                `Workflow` class to instantiate.
            manager:
                The `TaskManager` responsible for the submission of the task. 
                If manager is None, we use the `TaskManager` specified during the creation of the `Flow`.
                                                                                                            
        Returns:   
            The `Workflow` that will be finalized by the callback.
        """
        # TODO: pass a workflow factory instead of a class
        # Directory of the workflow.
        work_workdir = os.path.join(self.workdir, "work_" + str(len(self)))

        # Create an empty workflow and register the callback
        work = work_class(workdir=work_workdir, manager=manager)
        
        self._works.append(work)
                                                                                                            
        deps = [Dependency(node, exts) for node, exts in deps.items()]
        if not deps:
            raise ValueError("A callback must have deps!")

        work.add_deps(deps)

        # Wrap the callable in a Callback object and save 
        # useful info such as the index of the workflow and the callback data.
        cbk = Callback(cbk, work, deps=deps, cbk_data=cbk_data)
                                                                                                            
        self._callbacks.append(cbk)
                                                                                                            
        return work

    def allocate(self, manager=None):
        """
        Allocate the `AbinitFlow` i.e. assign the `workdir` and (optionally) 
        the `TaskManager` to the different tasks in the Flow.
        """
        for work in self:
            work.allocate(manager=self.manager)
            work.set_flow(self)

        for task in self.iflat_tasks():
            task.set_flow(self)

        self.check_dependencies()
        return self

    def show_dependencies(self):
        for work in self:
            work.show_intrawork_deps()

    def on_dep_ok(self, signal, sender):
        # TODO
        # Replace this callback with dynamic dispatch
        # on_all_S_OK for workflow
        # on_S_OK for task
        print("on_dep_ok with sender %s, signal %s" % (str(sender), signal))

        for i, cbk in enumerate(self._callbacks):

            if not cbk.handle_sender(sender):
                print("Do not handle")
                continue

            if not cbk.can_execute():
                print("cannot execute")
                continue 

            # Execute the callback to generate the workflow.
            print("about to build new workflow")
            #empty_work = self._works[cbk.w_idx]

            # TODO better treatment of ids
            # Make sure the new workflow has the same id as the previous one.
            #new_work_idx = cbk.w_idx
            work = cbk(flow=self)
            work.add_deps(cbk.deps)

            # Disable the callback.
            cbk.disable()

            # Update the database.
            self.pickle_dump()

    #def finalize(self):
    #    """This method is called when the flow is completed."""

    def connect_signals(self):
        """
        Connect the signals within the workflow.
        self is responsible for catching the important signals raised from 
        its task and raise new signals when some particular condition occurs.
        """
        # Connect the signals inside each Workflow.
        for work in self:
            work.connect_signals()

        # Observe the nodes that must reach S_OK in order to call the callbacks.
        for cbk in self._callbacks:
            for dep in cbk.deps:
                print("connecting %s \nwith sender %s, signal %s" % (str(cbk), dep.node, dep.node.S_OK))
                dispatcher.connect(self.on_dep_ok, signal=dep.node.S_OK, sender=dep.node, weak=False)

        # Associate to each signal the callback _on_signal
        # (bound method of the node that will be called by `AbinitFlow`
        # Each node will set its attribute _done_signal to True to tell
        # the flow that this callback should be disabled.

        # Register the callbacks for the Workflows.
        #for work in self:
        #    slot = self._sig_slots[work]
        #    for signal in S_ALL:
        #        done_signal = getattr(work, "_done_ " + signal, False)
        #        if not done_sig:
        #            cbk_name = "_on_" + str(signal)
        #            cbk = getattr(work, cbk_name, None)
        #            if cbk is None: continue
        #            slot[work][signal].append(cbk)
        #            print("connecting %s\nwith sender %s, signal %s" % (str(cbk), dep.node, dep.node.S_OK))
        #            dispatcher.connect(self.on_dep_ok, signal=signal, sender=dep.node, weak=False)

        # Register the callbacks for the Tasks.

        #self.show_receivers()

    def show_receivers(self, sender=dispatcher.Any, signal=dispatcher.Any):
        print("*** live receivers ***")
        for rec in dispatcher.liveReceivers(dispatcher.getReceivers(sender, signal)):
            print("receiver -->", rec)
        print("*** end live receivers ***")
Пример #15
0
class AbiFireTask(FireTaskBase):

    # List of `AbinitEvent` subclasses that are tested in the check_status method.
    # Subclasses should provide their own list if they need to check the converge status.
    CRITICAL_EVENTS = []

    S_INIT = Status.from_string("Initialized")
    S_LOCKED = Status.from_string("Locked")
    S_READY = Status.from_string("Ready")
    S_SUB = Status.from_string("Submitted")
    S_RUN = Status.from_string("Running")
    S_DONE = Status.from_string("Done")
    S_ABICRITICAL = Status.from_string("AbiCritical")
    S_QCRITICAL = Status.from_string("QCritical")
    S_UNCONVERGED = Status.from_string("Unconverged")
    S_ERROR = Status.from_string("Error")
    S_OK = Status.from_string("Completed")

    ALL_STATUS = [
        S_INIT,
        S_LOCKED,
        S_READY,
        S_SUB,
        S_RUN,
        S_DONE,
        S_ABICRITICAL,
        S_QCRITICAL,
        S_UNCONVERGED,
        S_ERROR,
        S_OK,
    ]

    def __init__(self, abiinput):
        """
        Basic __init__, subclasses are supposed to define the same input parameters, add their own and call super for
        the basic ones. The input parameter should be stored as attributes of the instance for serialization and
        for inspection.
        """
        self.abiinput = abiinput

    @serialize_fw
    def to_dict(self):
        d = {}
        for arg in inspect.getargspec(self.__init__).args:
            if arg != "self":
                val = self.__getattribute__(arg)
                if hasattr(val, "as_dict"):
                    val = val.as_dict()
                elif isinstance(val, (tuple, list)):
                    val = [v.as_dict() if hasattr(v, "as_dict") else v for v in val]
                d[arg] = val

        return d

    @classmethod
    def from_dict(cls, d):
        dec = MontyDecoder()
        kwargs = {k: dec.process_decoded(v) for k, v in d.items() if k in inspect.getargspec(cls.__init__).args}
        return cls(**kwargs)

    # from Task
    def set_workdir(self, workdir):
        """Set the working directory."""

        self.workdir = os.path.abspath(workdir)

        # Files required for the execution.
        self.input_file = File(os.path.join(self.workdir, "run.abi"))
        self.output_file = File(os.path.join(self.workdir, "run.abo"))
        self.files_file = File(os.path.join(self.workdir, "run.files"))
        self.log_file = File(os.path.join(self.workdir, "run.log"))
        self.stderr_file = File(os.path.join(self.workdir, "run.err"))

        # Directories with input|output|temporary data.
        self.indir = Directory(os.path.join(self.workdir, "indata"))
        self.outdir = Directory(os.path.join(self.workdir, "outdata"))
        self.tmpdir = Directory(os.path.join(self.workdir, "tmpdata"))

    # from Task
    def build(self):
        """
        Creates the working directory and the input files of the :class:`Task`.
        It does not overwrite files if they already exist.
        """
        # Create dirs for input, output and tmp data.
        self.indir.makedirs()
        self.outdir.makedirs()
        self.tmpdir.makedirs()

        # Write files file and input file.
        if not self.files_file.exists:
            self.files_file.write(self.filesfile_string)

        self.input_file.write(str(self.abiinput))

    # from Task
    # Prefixes for Abinit (input, output, temporary) files.
    Prefix = collections.namedtuple("Prefix", "idata odata tdata")
    pj = os.path.join

    prefix = Prefix(pj("indata", "in"), pj("outdata", "out"), pj("tmpdata", "tmp"))
    del Prefix, pj

    # from AbintTask
    @property
    def filesfile_string(self):
        """String with the list of files and prefixes needed to execute ABINIT."""
        lines = []
        app = lines.append
        pj = os.path.join

        app(self.input_file.path)  # Path to the input file
        app(self.output_file.path)  # Path to the output file
        app(pj(self.workdir, self.prefix.idata))  # Prefix for input data
        app(pj(self.workdir, self.prefix.odata))  # Prefix for output data
        app(pj(self.workdir, self.prefix.tdata))  # Prefix for temporary data

        # Paths to the pseudopotential files.
        # Note that here the pseudos **must** be sorted according to znucl.
        # Here we reorder the pseudos if the order is wrong.
        ord_pseudos = []
        znucl = self.abiinput.structure.to_abivars()["znucl"]

        for z in znucl:
            for p in self.abiinput.pseudos:
                if p.Z == z:
                    ord_pseudos.append(p)
                    break
            else:
                raise ValueError("Cannot find pseudo with znucl %s in pseudos:\n%s" % (z, self.pseudos))

        for pseudo in ord_pseudos:
            app(pseudo.path)

        return "\n".join(lines)

    def run_abinit(self, fw_spec):

        with open(self.files_file.path, "r") as stdin, open(self.log_file.path, "w") as stdout, open(
            self.stderr_file.path, "w"
        ) as stderr:

            p = subprocess.Popen(["mpirun", "abinit"], stdin=stdin, stdout=stdout, stderr=stderr)

        (stdoutdata, stderrdata) = p.communicate()
        self.returncode = p.returncode

    def get_event_report(self):
        """
        Analyzes the main output file for possible Errors or Warnings.

        Returns:
            :class:`EventReport` instance or None if the main output file does not exist.
        """

        if not self.log_file.exists:
            return None

        parser = events.EventsParser()
        try:
            report = parser.parse(self.log_file.path)
            return report

        except parser.Error as exc:
            # Return a report with an error entry with info on the exception.
            logger.critical("%s: Exception while parsing ABINIT events:\n %s" % (self.log_file, str(exc)))
            return parser.report_exception(self.log_file.path, exc)

    def task_analysis(self, fw_spec):

        status, msg = self.check_final_status()

        if self.status != self.S_OK:
            raise AbinitRuntimeError(self)

        return FWAction(stored_data=dict(**self.report.as_dict()))

    def run_task(self, fw_spec):
        self.set_workdir(os.path.abspath("."))
        self.build()
        self.run_abinit(fw_spec)
        return self.task_analysis(fw_spec)

    def set_status(self, status, msg=None):
        self.status = status
        return status, msg

    def check_final_status(self):
        """
        This function checks the status of the task by inspecting the output and the
        error files produced by the application. Based on abipy task checkstatus().
        """
        # 2) see if an error occured at starting the job
        # 3) see if there is output
        # 4) see if abinit reports problems
        # 5) see if err file exists and is empty
        # 9) the only way of landing here is if there is a output file but no err files...

        # 2) Check the returncode of the process (the process of submitting the job) first.
        if self.returncode != 0:
            # The job was not submitted properly
            return self.set_status(self.S_QCRITICAL, msg="return code %s" % self.returncode)

        # Analyze the stderr file for Fortran runtime errors.
        err_msg = None
        if self.stderr_file.exists:
            err_msg = self.stderr_file.read()

        # Start to check ABINIT status if the output file has been created.
        if self.output_file.exists:
            try:
                self.report = self.get_event_report()
            except Exception as exc:
                msg = "%s exception while parsing event_report:\n%s" % (self, exc)
                logger.critical(msg)
                return self.set_status(self.S_ABICRITICAL, msg=msg)

            if self.report.run_completed:

                # Check if the calculation converged.
                not_ok = self.report.filter_types(self.CRITICAL_EVENTS)
                if not_ok:
                    return self.set_status(self.S_UNCONVERGED)
                else:
                    return self.set_status(self.S_OK)

            # Calculation still running or errors?
            if self.report.errors or self.report.bugs:
                # Abinit reported problems
                if self.report.errors:
                    logger.debug("Found errors in report")
                    for error in self.report.errors:
                        logger.debug(str(error))
                        try:
                            self.abi_errors.append(error)
                        except AttributeError:
                            self.abi_errors = [error]

                # The job is unfixable due to ABINIT errors
                logger.debug("%s: Found Errors or Bugs in ABINIT main output!" % self)
                msg = "\n".join(map(repr, self.report.errors + self.report.bugs))
                return self.set_status(self.S_ABICRITICAL, msg=msg)

        # 9) if we still haven't returned there is no indication of any error and the job can only still be running
        # but we should actually never land here, or we have delays in the file system ....
        # print('the job still seems to be running maybe it is hanging without producing output... ')

        # Check time of last modification.
        if self.output_file.exists and (
            time.time() - self.output_file.get_stat().st_mtime > self.manager.policy.frozen_timeout
        ):
            msg = "Task seems to be frozen, last change more than %s [s] ago" % self.manager.policy.frozen_timeout
            return self.set_status(self.S_ERROR, msg)

        return self.set_status(self.S_RUN)

    # from GsTask
    @property
    def gsr_path(self):
        """Absolute path of the GSR file. Empty string if file is not present."""
        # Lazy property to avoid multiple calls to has_abiext.
        try:
            return self._gsr_path
        except AttributeError:
            path = self.outdir.has_abiext("GSR")
            if path:
                self._gsr_path = path
            return path

    def open_gsr(self):
        """
        Open the GSR file located in the in self.outdir.
        Returns :class:`GsrFile` object, None if file could not be found or file is not readable.
        """
        gsr_path = self.gsr_path
        if not gsr_path:
            if self.status == self.S_OK:
                logger.critical("%s reached S_OK but didn't produce a GSR file in %s" % (self, self.outdir))
            return None

        # Open the GSR file.
        from abipy.electrons.gsr import GsrFile

        try:
            return GsrFile(gsr_path)
        except Exception as exc:
            logger.critical("Exception while reading GSR file at %s:\n%s" % (gsr_path, str(exc)))
            return None