Пример #1
0
    def to_string(self, verbose=0):
        """String representation."""
        lines = []
        app = lines.append
        header = 10 * "=" + " Input File " + 10 * "="
        app(header)
        app(self.string)
        app(len(header) * "=" + "\n")

        # Print info on structure(s).
        if self.structure is not None:
            app(self.structure.spget_summary())
        else:
            structures = [dt.structure for dt in self.datasets]
            app("Input file contains %d structures:" % len(structures))
            for i, structure in enumerate(structures):
                app(boxed("Dataset: %d" % (i + 1)))
                app(structure.spget_summary())
                app("")

            dfs = dataframes_from_structures(
                structures, index=[i + 1 for i in range(self.ndtset)])
            app(
                boxed(
                    "Tabular view (each row corresponds to a dataset structure)"
                ))
            app("")
            app("Lattice parameters:")
            app(str(dfs.lattice))
            app("")
            app("Atomic positions:")
            app(str(dfs.coords))

        return "\n".join(lines)
Пример #2
0
    def to_string(self, verbose=0):
        """String representation."""
        lines = []
        app = lines.append
        header = 10 * "=" + " Input File " + 10 * "="
        app(header)
        app(self.string)
        app(len(header) * "=" + "\n")

        # Print info on structure(s).
        if self.structure is not None:
            app(self.structure.spget_summary())
        else:
            structures = [dt.structure for dt in self.datasets]
            app("Input file contains %d structures:" % len(structures))
            for i, structure in enumerate(structures):
                app(boxed("Dataset: %d" % (i+1)))
                app(structure.spget_summary())
                app("")

            dfs = dataframes_from_structures(structures, index=[i+1 for i in range(self.ndtset)])
            app(boxed("Tabular view (each row corresponds to a dataset structure)"))
            app("")
            app("Lattice parameters:")
            app(str(dfs.lattice))
            app("")
            app("Atomic positions:")
            app(str(dfs.coords))

        return "\n".join(lines)
Пример #3
0
 def exit_now():
     """
     Function used to test if we have to exit from the infinite loop below.
     Return: != 0 if we must exit. > 0 if some error occurred.
     """
     if flow.all_ok:
         cprint("Flow reached all_ok", "green")
         return -1
     if any(st.is_critical for st in before_task2stat.values()):
         cprint(boxed("Found tasks with critical status"), "red")
         return 1
     return 0
Пример #4
0
    def _callback(self):
        """The actual callback."""
        if self.debug:
            # Show the number of open file descriptors
            print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds())

        self._runem_all()

        # Mission accomplished. Shutdown the scheduler.
        all_ok = self.flow.all_ok
        if all_ok:
            return self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit")

        # Handle failures.
        err_lines = []

        # Shall we send a reminder to the user?
        delta_etime = self.get_delta_etime()

        if delta_etime.total_seconds() > self.num_reminders * self.remindme_s:
            self.num_reminders += 1
            msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " %
                  (self.pid, self.flow, delta_etime))
            retcode = self.send_email(msg, tag="[REMINDER]")

            if retcode:
                # Cannot send mail, shutdown now!
                msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" +
                        " but send_email returned %d. Aborting now" % retcode)
                err_lines.append(msg)

        #if delta_etime.total_seconds() > self.max_etime_s:
        #    err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s)

        # Too many exceptions. Shutdown the scheduler.
        if self.num_excs > self.max_num_pyexcs:
            msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % (
                self.num_excs, self.max_num_pyexcs)
            err_lines.append(boxed(msg))

        # Paranoid check: disable the scheduler if we have submitted
        # too many jobs (it might be due to some bug or other external reasons
        # such as race conditions between difference callbacks!)
        if self.nlaunch > self.safety_ratio * self.flow.num_tasks:
            msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % (
                self.nlaunch, self.flow.num_tasks)
            err_lines.append(boxed(msg))

        # Count the number of tasks with status == S_ERROR.
        if self.flow.num_errored_tasks > self.max_num_abierrs:
            msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % (
                self.flow.num_errored_tasks, self.max_num_abierrs)
            err_lines.append(boxed(msg))

        # Test on the presence of deadlocks.
        g = self.flow.find_deadlocks()
        if g.deadlocked:
            # Check the flow again so that status are updated.
            self.flow.check_status()

            g = self.flow.find_deadlocks()
            print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables, "\nrunning\n", g.running)
            if g.deadlocked and not g.runnables and not g.running:
                err_lines.append("No runnable job with deadlocked tasks:\n%s." % str(g.deadlocked))

        if not g.runnables and not g.running:
            # Check the flow again so that status are updated.
            self.flow.check_status()
            g = self.flow.find_deadlocks()
            if not g.runnables and not g.running:
                err_lines.append("No task is running and cannot find other tasks to submit.")

        # Something wrong. Quit
        if err_lines:
            # Cancel all jobs.
            if self.killjobs_if_errors:
                cprint("killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.", "yellow")
                try:
                    num_cancelled = 0
                    for task in self.flow.iflat_tasks():
                        num_cancelled += task.cancel()
                    cprint("Killed %d tasks" % num_cancelled, "yellow")
                except Exception as exc:
                    cprint("Exception while trying to kill jobs:\n%s" % str(exc), "red")

            self.shutdown("\n".join(err_lines))

        return len(self.exceptions)
Пример #5
0
    def _callback(self):
        """The actual callback."""
        if self.debug:
            # Show the number of open file descriptors
            print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds())

        self._runem_all()

        # Mission accomplished. Shutdown the scheduler.
        all_ok = self.flow.all_ok
        if self.verbose:
            print("all_ok", all_ok)

        if all_ok:
            self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit")

        # Handle failures.
        err_msg = ""

        # Shall we send a reminder to the user?
        delta_etime = self.get_delta_etime()

        if delta_etime.total_seconds() > self.num_reminders * self.remindme_s:
            self.num_reminders += 1
            msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " %
                  (self.pid, self.flow, delta_etime))
            retcode = self.send_email(msg, tag="[REMINDER]")

            if retcode:
                # Cannot send mail, shutdown now!
                msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" +
                        " but send_email returned %d. Aborting now" % retcode)
                err_msg += msg

        #if delta_etime.total_seconds() > self.max_etime_s:
        #    err_msg += "\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s

        # Too many exceptions. Shutdown the scheduler.
        if self.num_excs > self.max_num_pyexcs:
            msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % (
                self.num_excs, self.max_num_pyexcs)
            err_msg += boxed(msg)

        # Paranoid check: disable the scheduler if we have submitted
        # too many jobs (it might be due to some bug or other external reasons 
        # such as race conditions between difference callbacks!)
        if self.nlaunch > self.safety_ratio * self.flow.num_tasks:
            msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % (
                self.nlaunch, self.flow.num_tasks)
            err_msg += boxed(msg)

        # Count the number of tasks with status == S_ERROR.
        if self.flow.num_errored_tasks > self.max_num_abierrs:
            msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % (
                self.flow.num_errored_tasks, self.max_num_abierrs)
            err_msg += boxed(msg)

        deadlocked, runnables, running = self.flow.deadlocked_runnables_running()
        #print("\ndeadlocked:\n", deadlocked, "\nrunnables:\n", runnables, "\nrunning\n", running)
        if deadlocked and not runnables and not running:
            msg = "No runnable job with deadlocked tasks:\n %s\nWill shutdown the scheduler and exit" % str(deadlocked)
            err_msg += boxed(msg)

        if err_msg:
            # Something wrong. Quit
            self.shutdown(err_msg)

        return len(self.exceptions)
Пример #6
0
    def _callback(self):
        """The actual callback."""
        if self.debug:
            # Show the number of open file descriptors
            print(">>>>> _callback: Number of open file descriptors: %s" %
                  get_open_fds())

        self._runem_all()

        # Mission accomplished. Shutdown the scheduler.
        all_ok = self.flow.all_ok
        if all_ok:
            return self.shutdown(
                msg=
                "All tasks have reached S_OK. Will shutdown the scheduler and exit"
            )

        # Handle failures.
        err_lines = []

        # Shall we send a reminder to the user?
        delta_etime = self.get_delta_etime()

        if delta_etime.total_seconds() > self.num_reminders * self.remindme_s:
            self.num_reminders += 1
            msg = (
                "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s "
                % (self.pid, self.flow, delta_etime))
            retcode = self.send_email(msg, tag="[REMINDER]")

            if retcode:
                # Cannot send mail, shutdown now!
                msg += (
                    "\nThe scheduler tried to send an e-mail to remind the user\n"
                    + " but send_email returned %d. Aborting now" % retcode)
                err_lines.append(msg)

        #if delta_etime.total_seconds() > self.max_etime_s:
        #    err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s)

        # Too many exceptions. Shutdown the scheduler.
        if self.num_excs > self.max_num_pyexcs:
            msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % (
                self.num_excs, self.max_num_pyexcs)
            err_lines.append(boxed(msg))

        # Paranoid check: disable the scheduler if we have submitted
        # too many jobs (it might be due to some bug or other external reasons
        # such as race conditions between difference callbacks!)
        if self.nlaunch > self.safety_ratio * self.flow.num_tasks:
            msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % (
                self.nlaunch, self.flow.num_tasks)
            err_lines.append(boxed(msg))

        # Count the number of tasks with status == S_ERROR.
        if self.flow.num_errored_tasks > self.max_num_abierrs:
            msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % (
                self.flow.num_errored_tasks, self.max_num_abierrs)
            err_lines.append(boxed(msg))

        # Test on the presence of deadlocks.
        g = self.flow.find_deadlocks()
        if g.deadlocked:
            # Check the flow again so that status are updated.
            self.flow.check_status()

            g = self.flow.find_deadlocks()
            print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables,
                  "\nrunning\n", g.running)
            if g.deadlocked and not g.runnables and not g.running:
                err_lines.append(
                    "No runnable job with deadlocked tasks:\n%s." %
                    str(g.deadlocked))

        if not g.runnables and not g.running:
            # Check the flow again so that status are updated.
            self.flow.check_status()
            g = self.flow.find_deadlocks()
            if not g.runnables and not g.running:
                err_lines.append(
                    "No task is running and cannot find other tasks to submit."
                )

        # Something wrong. Quit
        if err_lines:
            # Cancel all jobs.
            if self.killjobs_if_errors:
                cprint(
                    "killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.",
                    "yellow")
                try:
                    num_cancelled = 0
                    for task in self.flow.iflat_tasks():
                        num_cancelled += task.cancel()
                    cprint("Killed %d tasks" % num_cancelled, "yellow")
                except Exception as exc:
                    cprint(
                        "Exception while trying to kill jobs:\n%s" % str(exc),
                        "red")

            self.shutdown("\n".join(err_lines))

        return len(self.exceptions)
Пример #7
0
    def _callback(self):
        """The actual callback."""
        if self.DEBUG:
            # Show the number of open file descriptors
            print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds())
        #print('before _runem_all in _callback')

        self._runem_all()

        # Mission accomplished. Shutdown the scheduler.
        all_ok = self.flow.all_ok
        if self.verbose:
            print("all_ok", all_ok)

        if all_ok:
            self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit")

        # Handle failures.
        err_msg = ""

        # Shall we send a reminder to the user?
        delta_etime = self.get_delta_etime()

        if delta_etime.total_seconds() > self.num_reminders * self.REMINDME_S:
            self.num_reminders += 1
            msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " %
                  (self.pid, self.flow, delta_etime))
            retcode = self.send_email(msg, tag="[REMINDER]")

            if retcode:
                # Cannot send mail, shutdown now!
                msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" +
                        " but send_email returned %d. Aborting now" % retcode)
                err_msg += msg

        #if delta_etime.total_seconds() > self.MAX_ETIME_S:
        #    err_msg += "\nExceeded MAX_ETIME_S %s. Will shutdown the scheduler and exit" % self.MAX_ETIME_S

        # Too many exceptions. Shutdown the scheduler.
        if self.num_excs > self.MAX_NUM_PYEXCS:
            msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % (
                self.num_excs, self.MAX_NUM_PYEXCS)
            err_msg += boxed(msg)

        # Paranoid check: disable the scheduler if we have submitted
        # too many jobs (it might be due to some bug or other external reasons 
        # such as race conditions between difference callbacks!)
        if self.nlaunch > self.SAFETY_RATIO * self.flow.num_tasks:
            msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % (
                self.nlaunch, self.flow.num_tasks)
            err_msg += boxed(msg)

        # Count the number of tasks with status == S_ERROR.
        if self.flow.num_errored_tasks > self.MAX_NUM_ABIERRS:
            msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % (
                self.flow.num_errored_tasks, self.MAX_NUM_ABIERRS)
            err_msg += boxed(msg)

        # Count the number of tasks with status == S_UNCONVERGED.
        #if self.flow.num_unconverged_tasks:
        #    # TODO: this is needed to avoid deadlocks, automatic restarting is not available yet
        #    msg = ("Found %d unconverged tasks."
        #           "Automatic restarting is not available yet. Will shutdown the scheduler and exit"
        #           % self.flow.num_unconverged_tasks)
        #    err_msg += boxed(msg)

        #deadlocks = self.detect_deadlocks()
        #if deadlocks:
        #    msg = ("Detected deadlocks in flow. Will shutdown the scheduler and exit"
        #           % self.flow.num_unconverged_tasks)
        #    err_msg += boxed(msg)

        if err_msg:
            # Something wrong. Quit
            self.shutdown(err_msg)

        return len(self.exceptions)
Пример #8
0
    def _callback(self):
        """The actual callback."""
        if self.DEBUG:
            # Show the number of open file descriptors
            print(">>>>> _callback: Number of open file descriptors: %s" %
                  get_open_fds())
        #print('before _runem_all in _callback')

        self._runem_all()

        # Mission accomplished. Shutdown the scheduler.
        all_ok = self.flow.all_ok
        if self.verbose:
            print("all_ok", all_ok)

        if all_ok:
            self.shutdown(
                msg=
                "All tasks have reached S_OK. Will shutdown the scheduler and exit"
            )

        # Handle failures.
        err_msg = ""

        # Shall we send a reminder to the user?
        delta_etime = self.get_delta_etime()

        if delta_etime.total_seconds() > self.num_reminders * self.REMINDME_S:
            self.num_reminders += 1
            msg = (
                "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s "
                % (self.pid, self.flow, delta_etime))
            retcode = self.send_email(msg, tag="[REMINDER]")

            if retcode:
                # Cannot send mail, shutdown now!
                msg += (
                    "\nThe scheduler tried to send an e-mail to remind the user\n"
                    + " but send_email returned %d. Aborting now" % retcode)
                err_msg += msg

        #if delta_etime.total_seconds() > self.MAX_ETIME_S:
        #    err_msg += "\nExceeded MAX_ETIME_S %s. Will shutdown the scheduler and exit" % self.MAX_ETIME_S

        # Too many exceptions. Shutdown the scheduler.
        if self.num_excs > self.MAX_NUM_PYEXCS:
            msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % (
                self.num_excs, self.MAX_NUM_PYEXCS)
            err_msg += boxed(msg)

        # Paranoid check: disable the scheduler if we have submitted
        # too many jobs (it might be due to some bug or other external reasons
        # such as race conditions between difference callbacks!)
        if self.nlaunch > self.SAFETY_RATIO * self.flow.num_tasks:
            msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % (
                self.nlaunch, self.flow.num_tasks)
            err_msg += boxed(msg)

        # Count the number of tasks with status == S_ERROR.
        if self.flow.num_errored_tasks > self.MAX_NUM_ABIERRS:
            msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % (
                self.flow.num_errored_tasks, self.MAX_NUM_ABIERRS)
            err_msg += boxed(msg)

        # Count the number of tasks with status == S_UNCONVERGED.
        #if self.flow.num_unconverged_tasks:
        #    # TODO: this is needed to avoid deadlocks, automatic restarting is not available yet
        #    msg = ("Found %d unconverged tasks."
        #           "Automatic restarting is not available yet. Will shutdown the scheduler and exit"
        #           % self.flow.num_unconverged_tasks)
        #    err_msg += boxed(msg)

        #deadlocks = self.detect_deadlocks()
        #if deadlocks:
        #    msg = ("Detected deadlocks in flow. Will shutdown the scheduler and exit"
        #           % self.flow.num_unconverged_tasks)
        #    err_msg += boxed(msg)

        if err_msg:
            # Something wrong. Quit
            self.shutdown(err_msg)

        return len(self.exceptions)