class NewLineTriggerTask(BaseTask):
    """ Triggers a callback function upon a new line added to a file.

    This trigger task watches a specified file for new line. After having
    aggregated a given number of line changes it calls the provided callback function with
    a list of lines that were added.
    """
    def __init__(self,
                 name,
                 path,
                 callback,
                 aggregate=None,
                 use_existing=False,
                 flush_existing=True,
                 event_trigger_time=0.5,
                 stop_polling_rate=2,
                 *,
                 callback_init=None,
                 callback_finally=None,
                 queue=JobType.Task,
                 force_run=False,
                 propagate_skip=True):
        """ Initialize the filesystem notify trigger task.

        All task parameters except the name, callback, queue, force_run and propagate_skip
        can either be their native type or a callable returning the native type.

        Args:
            name (str): The name of the task.
            path: The path to the file that should be watched for new lines.
                  The path has to be an absolute path, otherwise an exception is thrown.
            callback (callable): A callable object that is called with the list of lines
                                 that have changed. The function definition is
                                 def callback(lines, data, store, signal, context).
            aggregate (int, None): The number of lines that are aggregated before the
                                   callback is called. Set to None or 1 to trigger
                                   on each new line event occurrence.
            use_existing (bool): Use the existing lines that are located in file for
                                 initialising the line list.
            flush_existing (bool): If 'use_existing' is True, then flush all existing
                                   lines without regard to the aggregation setting.
                                   I.e,. all existing lines are sent to the callback.
            event_trigger_time (float, None): The waiting time between events in seconds.
                                              Set to None to turn off.
            stop_polling_rate (float): The number of events after which a signal is sent
                                       to the workflow to check whether the task
                                       should be stopped.
            queue (str): Name of the queue the task should be scheduled to. Defaults to
                         the general task queue.
            callback_init (callable): A callable that is called shortly before the task
                                      is run. The definition is:
                                        def (data, store, signal, context)
                                      where data the task data, store the workflow
                                      data store, signal the task signal and
                                      context the task context.
            callback_finally (callable): A callable that is always called at the end of
                                         a task, regardless whether it completed
                                         successfully, was stopped or was aborted.
                                         The definition is:
                                           def (status, data, store, signal, context)
                                         where status specifies whether the task was
                                           success: TaskStatus.Success
                                           stopped: TaskStatus.Stopped
                                           aborted: TaskStatus.Aborted
                                           raised exception: TaskStatus.Error
                                         data the task data, store the workflow
                                         data store, signal the task signal and
                                         context the task context.
            force_run (bool): Run the task even if it is flagged to be skipped.
            propagate_skip (bool): Propagate the skip flag to the next task.
        """
        super().__init__(name,
                         queue=queue,
                         callback_init=callback_init,
                         callback_finally=callback_finally,
                         force_run=force_run,
                         propagate_skip=propagate_skip)

        # set the tasks's parameters
        self.params = TaskParameters(
            path=path,
            aggregate=aggregate if aggregate is not None else 1,
            use_existing=use_existing,
            flush_existing=flush_existing,
            event_trigger_time=event_trigger_time,
            stop_polling_rate=stop_polling_rate,
        )
        self._callback = callback

    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the NotifyTriggerTask task.

        Args:
            data (MultiTaskData): The data object that has been passed from the
                                  predecessor task.
            store (DataStoreDocument): The persistent data store object that allows the
                                       task to store data for access across the current
                                       workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                                 and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.

        Raises:
            LightflowFilesystemPathError: If the specified path is not absolute.
        """
        params = self.params.eval(data, store)

        if not os.path.isabs(params.path):
            raise LightflowFilesystemPathError(
                'The specified path is not an absolute path')

        # if requested, pre-fill the file list with existing lines
        lines = []
        num_read_lines = 0
        if params.use_existing:
            with open(params.path, 'r') as file:
                lines = file.readlines()

            num_read_lines = len(lines)
            if params.flush_existing and num_read_lines > 0:
                if self._callback is not None:
                    self._callback(lines, data, store, signal, context)

                del lines[:]

        polling_event_number = 0

        def watch_file(file_pointer, task_signal):
            while True:
                if task_signal.is_stopped:
                    break

                new = file_pointer.readline()
                if new:
                    yield new
                else:
                    time.sleep(params.event_trigger_time)

        file = open(params.path, 'r')
        try:
            if params.use_existing:
                for i in range(num_read_lines):
                    file.readline()
            else:
                file.seek(0, 2)

            for line in watch_file(file, signal):
                lines.append(line)

                # check every stop_polling_rate events the stop signal
                polling_event_number += 1
                if polling_event_number > params.stop_polling_rate:
                    polling_event_number = 0
                    if signal.is_stopped:
                        break

                # as soon as enough lines have been aggregated call the callback function
                if len(lines) >= params.aggregate:
                    chunks = len(lines) // params.aggregate
                    for i in range(0, chunks):
                        if self._callback is not None:
                            self._callback(lines[0:params.aggregate], data,
                                           store, signal, context)

                        del lines[0:params.aggregate]
        finally:
            file.close()

        return Action(data)
Пример #2
0
class GlobTask(BaseTask):
    """ Returns list of files from path using glob. """
    def __init__(self,
                 name,
                 paths,
                 callback,
                 pattern='*',
                 recursive=False,
                 return_abs=True,
                 *,
                 queue=JobType.Task,
                 callback_init=None,
                 callback_finally=None,
                 force_run=False,
                 propagate_skip=True):
        """ Initialize the glob task object.

        All task parameters except the name, callback, queue, force_run and propagate_skip
        can either be their native type or a callable returning the native type.

        Args:
            name (str): The name of the task.
            paths (str/list/callable): A path, or list of paths, to look in for files.
                                       The paths have to be absolute paths, otherwise an
                                       exception is thrown. This parameter can either be
                                       a string, a list of strings or a callable that
                                       returns a string or a list of strings.
            callback (callable): A callable object that is called with the result of the
                                 glob operation. The function definition is
                                 def callback(files, data, store, signal, context).
            pattern (str): The glob style pattern to match when returning files.
            recursive (bool): Recursively look for files. Use ** to match any files
                              and zero or more directories and subdirectories.
                              May slow things down if lots of files.
            return_abs (bool): If True return absolute paths,
                               if False return filename only.
            queue (str): Name of the queue the task should be scheduled to. Defaults to
                         the general task queue.
            callback_init (callable): A callable that is called shortly before the task
                                      is run. The definition is:
                                        def (data, store, signal, context)
                                      where data the task data, store the workflow
                                      data store, signal the task signal and
                                      context the task context.
            callback_finally (callable): A callable that is always called at the end of
                                         a task, regardless whether it completed
                                         successfully, was stopped or was aborted.
                                         The definition is:
                                           def (status, data, store, signal, context)
                                         where status specifies whether the task was
                                           success: TaskStatus.Success
                                           stopped: TaskStatus.Stopped
                                           aborted: TaskStatus.Aborted
                                           raised exception: TaskStatus.Error
                                         data the task data, store the workflow
                                         data store, signal the task signal and
                                         context the task context.
            force_run (bool): Run the task even if it is flagged to be skipped.
            propagate_skip (bool): Propagate the skip flag to the next task.
        """
        super().__init__(name,
                         queue=queue,
                         callback_init=callback_init,
                         callback_finally=callback_finally,
                         force_run=force_run,
                         propagate_skip=propagate_skip)

        self.params = TaskParameters(paths=paths,
                                     pattern=pattern,
                                     recursive=recursive,
                                     return_abs=return_abs)
        self._callback = callback

    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the glob task.

        Args:
            data (MultiTaskData): The data object that has been passed from the
                                  predecessor task.
            store (DataStoreDocument): The persistent data store object that allows the
                                       task to store data for access across the current
                                       workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                                 and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.

        Raises:
            LightflowFilesystemPathError: If the specified path is not absolute.

        Returns:
            Action: An Action object containing the data that should be passed on
                    to the next task and optionally a list of successor tasks that
                    should be executed.
        """
        params = self.params.eval(data, store)
        paths = [params.paths] if isinstance(params.paths,
                                             str) else params.paths

        if not all([isabs(path) for path in paths]):
            raise LightflowFilesystemPathError(
                'The specified path is not an absolute path')

        files = [
            file if params.return_abs else basename(file) for path in paths
            for file in glob(pjoin(path, params.pattern),
                             recursive=params.recursive)
        ]

        if self._callback is not None:
            self._callback(files, data, store, signal, context)

        return Action(data)
Пример #3
0
class BashTask(BaseTask):
    """ The Bash task executes a user-defined bash command or bash file.

    All task parameters except the name, callbacks, queue, force_run and
    propagate_skip can either be their native type or a callable returning
    the native type.

    Args:
        name (str): The name of the task.
        command (function, str): The command or bash file that should be executed.
        cwd (function, str, None): The working directory for the command.
        env (function, dict, None): A dictionary of environment variables.
        user (function, int, None): The user ID of the user with which the command
            should be executed.
        group (function, int, None): The group ID of the group with which the command
            should be executed.
        stdin (function, str, None): An input string that should be passed on to the
            process.
        refresh_time (function, float): The time in seconds the internal output
            handling waits before checking for new output from the process.
        capture_stdout (function, bool): Set to ``True`` to capture all standard output
            in a temporary file.
        capture_stderr (function, bool): Set to ``True`` to capture all standard errors
            in a temporary file.
        callback_process (callable): A callable that is called after the process
            started. The definition is::

                (pid, data, store, signal, context) -> None

            with the parameters:

                - **pid** (*int*): The process PID.
                - **data** (:class:`.MultiTaskData`): The data object that has been passed\
                    from the predecessor task.
                - **store** (:class:`.DataStoreDocument`): The persistent data store object\
                    that allows the task to store data for access across the current\
                    workflow run.
                - **signal** (*TaskSignal*): The signal object for tasks. It wraps\
                    the construction and sending of signals into easy to use methods.
                - **context** (*TaskContext*): The context in which the tasks runs.

        callback_end (callable): A callable that is called after the process
            completed. The definition is::

                (returncode, stdout_file, stderr_file,
                 data, store, signal, context) -> None

            with the parameters:

                - **returncode** (*int*): The return code of the process.
                - **stdout_file**: A file object with the standard output\
                    if the flag ``capture_stdout`` was set to ``True``,\
                    otherwise ``None``.
                - **stderr_file**: A file object with the error output\
                    if the flag ``capture_stderr`` was set to ``True``
                    otherwise ``None.``
                - **data** (:class:`.MultiTaskData`): The data object that has been passed\
                    from the predecessor task.
                - **store** (:class:`.DataStoreDocument`): The persistent data store object\
                    that allows the task to store data for access across the current\
                    workflow run.
                - **signal** (*TaskSignal*): The signal object for tasks. It wraps\
                    the construction and sending of signals into easy to use methods.
                - **context** (*TaskContext*): The context in which the tasks runs.


        callback_stdout (callable): A callable that is called for every line of
            output the process sends to stdout. The definition is::

                (line, data, store, signal, context) -> None

            with the parameters:
                - **line** (*str*): Single line of the process output as a string.
                - **data** (:class:`.MultiTaskData`): The data object that has been passed\
                    from the predecessor task.
                - **store** (:class:`.DataStoreDocument`): The persistent data store object\
                    that allows the task to store data for access across the current\
                    workflow run.
                - **signal** (*TaskSignal*): The signal object for tasks. It wraps\
                    the construction and sending of signals into easy to use methods.
                - **context** (*TaskContext*): The context in which the tasks runs.

        callback_stderr (callable): A callable that is called for every line of
            output the process sends to stderr. The definition is::

                (line, data, store, signal, context) -> None

            with the parameters:
                - **line** (*str*): Single line of the process output as a string.
                - **data** (:class:`.MultiTaskData`): The data object that has been passed\
                    from the predecessor task.
                - **store** (:class:`.DataStoreDocument`): The persistent data store object\
                    that allows the task to store data for access across the current\
                    workflow run.
                - **signal** (*TaskSignal*): The signal object for tasks. It wraps\
                    the construction and sending of signals into easy to use methods.
                - **context** (*TaskContext*): The context in which the tasks runs.

        queue (str): Name of the queue the task should be scheduled to. Defaults to
            the general task queue.
        callback_init (callable): An optional callable that is called shortly
            before the task is run. The definition is::

                (data, store, signal, context) -> None

            with the parameters:

                - **data** (:class:`.MultiTaskData`): The data object that has been passed\
                    from the predecessor task.
                - **store** (:class:`.DataStoreDocument`): The persistent data store object\
                    that allows the task to store data for access across the current\
                    workflow run.
                - **signal** (*TaskSignal*): The signal object for tasks. It wraps\
                    the construction and sending of signals into easy to use methods.
                - **context** (*TaskContext*): The context in which the tasks runs.

        callback_finally (callable): An optional callable that is always called
            at the end of a task, regardless whether it completed successfully,
            was stopped or was aborted. The definition is::

                (status, data, store, signal, context) -> None

            with the parameters:

                - **status** (*TaskStatus*): The current status of the task. It can\
                    be one of the following:

                        - ``TaskStatus.Success`` -- task was successful
                        - ``TaskStatus.Stopped`` -- task was stopped
                        - ``TaskStatus.Aborted`` -- task was aborted
                        - ``TaskStatus.Error`` -- task raised an exception

                - **data** (:class:`.MultiTaskData`): The data object that has been passed\
                    from the predecessor task.
                - **store** (:class:`.DataStoreDocument`): The persistent data store object\
                    that allows the task to store data for access across the current\
                    workflow run.
                - **signal** (*TaskSignal*): The signal object for tasks. It wraps\
                    the construction and sending of signals into easy to use methods.
                - **context** (*TaskContext*): The context in which the tasks runs.

        force_run (bool): Run the task even if it is flagged to be skipped.
        propagate_skip (bool): Propagate the skip flag to the next task.
    """
    def __init__(self,
                 name,
                 command,
                 cwd=None,
                 env=None,
                 user=None,
                 group=None,
                 stdin=None,
                 refresh_time=0.1,
                 capture_stdout=False,
                 capture_stderr=False,
                 callback_process=None,
                 callback_end=None,
                 callback_stdout=None,
                 callback_stderr=None,
                 *,
                 queue=DefaultJobQueueName.Task,
                 callback_init=None,
                 callback_finally=None,
                 force_run=False,
                 propagate_skip=True):
        super().__init__(name,
                         queue=queue,
                         callback_init=callback_init,
                         callback_finally=callback_finally,
                         force_run=force_run,
                         propagate_skip=propagate_skip)

        self.params = TaskParameters(command=command,
                                     cwd=cwd,
                                     env=env,
                                     user=user,
                                     group=group,
                                     stdin=stdin,
                                     refresh_time=refresh_time,
                                     capture_stdout=capture_stdout,
                                     capture_stderr=capture_stderr)

        self._callback_process = callback_process
        self._callback_end = callback_end
        self._callback_stdout = callback_stdout
        self._callback_stderr = callback_stderr

    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the Python task.

        Args:
            data (:class:`.MultiTaskData`): The data object that has been passed from the
                predecessor task.
            store (:class:`.DataStoreDocument`): The persistent data store object that allows the
                task to store data for access across the current workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.

        Returns:
            Action (Action): An Action object containing the data that should be passed on
                to the next task and optionally a list of successor tasks that
                should be executed.
        """
        params = self.params.eval(data, store, exclude=['command'])

        capture_stdout = self._callback_stdout is not None or params.capture_stdout
        capture_stderr = self._callback_stderr is not None or params.capture_stderr

        stdout_file = TemporaryFile() if params.capture_stdout else None
        stderr_file = TemporaryFile() if params.capture_stderr else None

        stdout = PIPE if capture_stdout else None
        stderr = PIPE if capture_stderr else None

        # change the user or group under which the process should run
        if params.user is not None or params.group is not None:
            pre_exec = self._run_as(params.user, params.group)
        else:
            pre_exec = None

        # call the command
        proc = Popen(self.params.eval_single('command', data, store),
                     cwd=params.cwd,
                     shell=True,
                     env=params.env,
                     preexec_fn=pre_exec,
                     stdout=stdout,
                     stderr=stderr,
                     stdin=PIPE if params.stdin is not None else None)

        # if input is available, send it to the process
        if params.stdin is not None:
            proc.stdin.write(params.stdin.encode(sys.getfilesystemencoding()))

        # send a notification that the process has been started
        try:
            if self._callback_process is not None:
                self._callback_process(proc.pid, data, store, signal, context)
        except (StopTask, AbortWorkflow):
            proc.terminate()
            raise

        # send the output handling to a thread
        if capture_stdout or capture_stderr:
            output_reader = BashTaskOutputReader(proc, stdout_file,
                                                 stderr_file,
                                                 self._callback_stdout,
                                                 self._callback_stderr,
                                                 params.refresh_time, data,
                                                 store, signal, context)
            output_reader.start()
        else:
            output_reader = None

        # wait for the process to complete and watch for a stop signal
        while proc.poll() is None or\
                (output_reader is not None and output_reader.is_alive()):
            sleep(params.refresh_time)
            if signal.is_stopped:
                proc.terminate()

        if output_reader is not None:
            output_reader.join()
            data = output_reader.data

            # if a stop or abort exception was raised, stop the bash process and re-raise
            if output_reader.exc_obj is not None:
                if proc.poll() is None:
                    proc.terminate()
                raise output_reader.exc_obj

        # send a notification that the process has completed
        if self._callback_end is not None:
            if stdout_file is not None:
                stdout_file.seek(0)
            if stderr_file is not None:
                stderr_file.seek(0)

            self._callback_end(proc.returncode, stdout_file, stderr_file, data,
                               store, signal, context)

        if stdout_file is not None:
            stdout_file.close()

        if stderr_file is not None:
            stderr_file.close()

        return Action(data)

    @staticmethod
    def _run_as(user, group):
        """ Function wrapper that sets the user and group for the process """
        def wrapper():
            if user is not None:
                os.setuid(user)
            if group is not None:
                os.setgid(group)

        return wrapper
class PvTriggerTask(BaseTask):
    """ Triggers the execution of a callback function upon a change in a monitored PV.

    This trigger task monitors a PV for changes. If a change occurs a provided callback
    function is executed.
    """
    def __init__(self,
                 name,
                 pv_name,
                 callback,
                 event_trigger_time=None,
                 stop_polling_rate=2,
                 skip_initial_callback=True,
                 *,
                 queue=JobType.Task,
                 callback_init=None,
                 callback_finally=None,
                 force_run=False,
                 propagate_skip=True):
        """ Initialize the filesystem notify trigger task.

        All task parameters except the name, callback, queue, force_run and propagate_skip
        can either be their native type or a callable returning the native type.

        Args:
            name (str): The name of the task.
            pv_name (str, callable): The name of the PV that should be monitored.
            callback (callable): A callable object that is called when the PV changes.
                                 The function definition is
                                 def callback(data, store, signal, context, event)
                                 where event is the information returned by PyEPICS for
                                 a monitor callback event. 
            event_trigger_time (float, None): The waiting time between events in seconds.
                                              Set to None to turn off.
            stop_polling_rate (float): The number of events after which a signal is sent
                                       to the workflow to check whether the task
                                       should be stopped.
            skip_initial_callback (bool): Set to True to skip executing the callback
                                          upon initialization of the PV monitoring.
            queue (str): Name of the queue the task should be scheduled to. Defaults to
                         the general task queue.
            callback_init (callable): A callable that is called shortly before the task
                                      is run. The definition is:
                                        def (data, store, signal, context)
                                      where data the task data, store the workflow
                                      data store, signal the task signal and
                                      context the task context.
            callback_finally (callable): A callable that is always called at the end of
                                         a task, regardless whether it completed
                                         successfully, was stopped or was aborted.
                                         The definition is:
                                           def (status, data, store, signal, context)
                                         where status specifies whether the task was
                                           success: TaskStatus.Success
                                           stopped: TaskStatus.Stopped
                                           aborted: TaskStatus.Aborted
                                           raised exception: TaskStatus.Error
                                         data the task data, store the workflow
                                         data store, signal the task signal and
                                         context the task context.
            force_run (bool): Run the task even if it is flagged to be skipped.
            propagate_skip (bool): Propagate the skip flag to the next task.
        """
        super().__init__(name,
                         queue=queue,
                         callback_init=callback_init,
                         callback_finally=callback_finally,
                         force_run=force_run,
                         propagate_skip=propagate_skip)

        # set the tasks's parameters
        self.params = TaskParameters(
            pv_name=pv_name,
            event_trigger_time=event_trigger_time,
            stop_polling_rate=stop_polling_rate,
            skip_initial_callback=skip_initial_callback)
        self._callback = callback

    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the PvTriggerTask task.

        Args:
            data (MultiTaskData): The data object that has been passed from the
                                  predecessor task.
            store (DataStoreDocument): The persistent data store object that allows the
                                       task to store data for access across the current
                                       workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                                 and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.
        """
        params = self.params.eval(data, store)

        skipped_initial = False if params.skip_initial_callback else True
        polling_event_number = 0
        queue = deque()

        # set up the internal callback
        pv = PV(params.pv_name,
                callback=partial(self._pv_callback, queue=queue))

        while True:
            if params.event_trigger_time is not None:
                time.sleep(params.event_trigger_time)

            # check every stop_polling_rate events the stop signal
            polling_event_number += 1
            if polling_event_number > params.stop_polling_rate:
                polling_event_number = 0
                if signal.is_stopped:
                    break

            # get all the events from the queue and call the callback function
            while len(queue) > 0:
                event = queue.pop()
                if skipped_initial:
                    if self._callback is not None:
                        self._callback(data, store, signal, context, **event)
                else:
                    skipped_initial = True

        pv.clear_callbacks()
        return Action(data)

    @staticmethod
    def _pv_callback(queue, **kwargs):
        """ Internal callback method for the PV monitoring. """
        queue.append(kwargs)
Пример #5
0
class WalkTask(BaseTask):
    """ Walks (recursively) down a directory and calls a callable for each file. """
    def __init__(self,
                 name,
                 path,
                 callback,
                 recursive=False,
                 *,
                 queue=JobType.Task,
                 callback_init=None,
                 callback_finally=None,
                 force_run=False,
                 propagate_skip=True):
        """ Initialize the walk task object.

        All task parameters except the name, callback, queue, force_run and propagate_skip
        can either be their native type or a callable returning the native type.

        Args:
            name (str): The name of the task.
            path (str, callable): The path to the directory that should be walked.
                                  The path has to be an absolute path, otherwise
                                  an exception is thrown.
            callback (callable): A callable object that is called for each file in the
                                 directory given by path. The function definition is
                                 def callback(entry, data, store, signal, context).
                                 where entry is of type os.DirEntry.
            recursive (bool): Recursively look for files in the directory.
            queue (str): Name of the queue the task should be scheduled to. Defaults to
                         the general task queue.
            callback_init (callable): A callable that is called shortly before the task
                                      is run. The definition is:
                                        def (data, store, signal, context)
                                      where data the task data, store the workflow
                                      data store, signal the task signal and
                                      context the task context.
            callback_finally (callable): A callable that is always called at the end of
                                         a task, regardless whether it completed
                                         successfully, was stopped or was aborted.
                                         The definition is:
                                           def (status, data, store, signal, context)
                                         where status specifies whether the task was
                                           success: TaskStatus.Success
                                           stopped: TaskStatus.Stopped
                                           aborted: TaskStatus.Aborted
                                           raised exception: TaskStatus.Error
                                         data the task data, store the workflow
                                         data store, signal the task signal and
                                         context the task context.
            force_run (bool): Run the task even if it is flagged to be skipped.
            propagate_skip (bool): Propagate the skip flag to the next task.
        """
        super().__init__(name,
                         queue=queue,
                         callback_init=callback_init,
                         callback_finally=callback_finally,
                         force_run=force_run,
                         propagate_skip=propagate_skip)

        self.params = TaskParameters(path=path, recursive=recursive)
        self._callback = callback

    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the walk task.

        Args:
            data (MultiTaskData): The data object that has been passed from the
                                  predecessor task.
            store (DataStoreDocument): The persistent data store object that allows the
                                       task to store data for access across the current
                                       workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                                 and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.

        Raises:
            LightflowFilesystemPathError: If the specified path is not absolute.

        Returns:
            Action: An Action object containing the data that should be passed on
                    to the next task and optionally a list of successor tasks that
                    should be executed.
        """
        params = self.params.eval(data, store)

        if not isabs(params.path):
            raise LightflowFilesystemPathError(
                'The specified path is not an absolute path')

        for entry in self._scantree(params.path, params.recursive):
            if self._callback is not None:
                self._callback(entry, data, store, signal, context)

        return Action(data)

    def _scantree(self, path, recursive=True):
        """ (recursively) yield DirEntry objects for directory given by the path."""
        for entry in scandir(path):
            if entry.is_dir(follow_symlinks=False):
                if recursive:
                    yield from self._scantree(entry.path)
            else:
                yield entry
Пример #6
0
class MoveTask(BaseTask):
    """ Moves a list of files or folders from a source to a destination. """
    def __init__(self,
                 name,
                 sources,
                 destination,
                 *,
                 queue=JobType.Task,
                 callback_init=None,
                 callback_finally=None,
                 force_run=False,
                 propagate_skip=True):
        """ Initialize the Move task.

        All task parameters except the name, queue, force_run and propagate_skip
        can either be their native type or a callable returning the native type.

        Args:
            name (str): The name of the task.
            sources (str/list/callable): A single file or directory path or a list of
                                         file or directory paths that should be moved.
                                         This parameter can either be a string, a list of
                                         strings or a callable that returns a string or a
                                         list of strings. The paths have to be absolute
                                         paths, otherwise an exception is thrown.
            destination: The destination file or folder the source should be
                         moved to. This parameter can either be a string or a
                         callable returning a string.
            queue (str): Name of the queue the task should be scheduled to. Defaults to
                         the general task queue.
            callback_init (callable): A callable that is called shortly before the task
                                      is run. The definition is:
                                        def (data, store, signal, context)
                                      where data the task data, store the workflow
                                      data store, signal the task signal and
                                      context the task context.
            callback_finally (callable): A callable that is always called at the end of
                                         a task, regardless whether it completed
                                         successfully, was stopped or was aborted.
                                         The definition is:
                                           def (status, data, store, signal, context)
                                         where status specifies whether the task was
                                           success: TaskStatus.Success
                                           stopped: TaskStatus.Stopped
                                           aborted: TaskStatus.Aborted
                                           raised exception: TaskStatus.Error
                                         data the task data, store the workflow
                                         data store, signal the task signal and
                                         context the task context.
            force_run (bool): Run the task even if it is flagged to be skipped.
            propagate_skip (bool): Propagate the skip flag to the next task.
        """
        super().__init__(name,
                         queue=queue,
                         callback_init=callback_init,
                         callback_finally=callback_finally,
                         force_run=force_run,
                         propagate_skip=propagate_skip)

        self.params = TaskParameters(sources=sources, destination=destination)

    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the MoveTask task.

        Args:
            data (MultiTaskData): The data object that has been passed from the
                                  predecessor task.
            store (DataStoreDocument): The persistent data store object that allows the
                                       task to store data for access across the current
                                       workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                                 and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.

        Raises:
            LightflowFilesystemPathError: If the source is a directory
                                          but the target is not.
            LightflowFilesystemMoveError: If the move process failed.

        Returns:
            Action: An Action object containing the data that should be passed on
                    to the next task and optionally a list of successor tasks that
                    should be executed.
        """
        params = self.params.eval(data, store)
        sources = [params.sources] if isinstance(params.sources,
                                                 str) else params.sources

        for source in sources:
            logger.info('Move {} to {}'.format(source, params.destination))

            if not os.path.isabs(source):
                raise LightflowFilesystemPathError(
                    'The source path is not an absolute path')

            if not os.path.isabs(params.destination):
                raise LightflowFilesystemPathError(
                    'The destination path is not an absolute path')

            if os.path.isdir(source) and not os.path.isdir(params.destination):
                raise LightflowFilesystemPathError(
                    'The destination is not a valid directory')

            try:
                shutil.move(source, params.destination)
            except OSError as e:
                raise LightflowFilesystemMoveError(e)

        return Action(data)
class NotifyTriggerTask(BaseTask):
    """ Triggers a callback function upon file changes in a directory.

    This trigger task watches a specified directory for file changes. After having
    aggregated a given number of file changes it calls the provided callback function
    with a list of the files that were changed.
    """
    def __init__(self, name, path, callback,
                 recursive=True, aggregate=None, skip_duplicate=False,
                 use_existing=False, flush_existing=True, exclude_mask=None,
                 on_file_create=False, on_file_close=True,
                 on_file_delete=False, on_file_move=False,
                 event_trigger_time=None, stop_polling_rate=2, *,
                 callback_init=None, callback_finally=None,
                 queue=JobType.Task, force_run=False, propagate_skip=True):
        """ Initialize the filesystem notify trigger task.

        All task parameters except the name, callback, queue, force_run and propagate_skip
        can either be their native type or a callable returning the native type.

        Args:
            name (str): The name of the task.
            path: The path to the directory that should be watched for filesystem changes.
                  The path has to be an absolute path, otherwise an exception is thrown.
            callback (callable): A callable object that is called with the list of files
                                 that have changed. The function definition is
                                 def callback(files, data, store, signal, context).
            recursive (bool): Set to True to watch for file system changes in
                              subdirectories of the specified path. Keeps track of
                              the creation and deletion of subdirectories.
            aggregate (int, None): The number of events that are aggregated before the
                                   callback function is called. Set to None or 1 to
                                   trigger on each file event occurrence.
            skip_duplicate (bool): Skip duplicated file names. Duplicated entries can
                                   occur if the same file is modified before the list
                                   of files is handed to the callback. Another case
                                   is if the parameter 'use_existing' is activated and
                                   an existing file is modified before the aggregated
                                   files are sent to the callback function.
            use_existing (bool): Use the existing files that are located in path for
                                 initializing the file list.
            flush_existing (bool): If 'use_existing' is True, then flush all existing
                                   files without regard to the aggregation setting.
                                   I.e,. all existing files sent to the callback.
            exclude_mask (str): Specifies a regular expression that can be used to exclude
                                files. For example if a detector creates temporary files
                                that should not be sent to the callback function.
            on_file_create (bool): Set to True to listen for file creation events.
            on_file_close (bool): Set to True to listen for file closing events.
            on_file_delete (bool): Set to True to listen for file deletion events.
            on_file_move (bool):  Set to True to listen for file move events.
            event_trigger_time (float, None): The waiting time between events in seconds.
                                              Set to None to turn off.
            stop_polling_rate (float): The number of events after which a signal is sent
                                       to the workflow to check whether the task
                                       should be stopped.
            queue (str): Name of the queue the task should be scheduled to. Defaults to
                         the general task queue.
            callback_init (callable): A callable that is called shortly before the task
                                      is run. The definition is:
                                        def (data, store, signal, context)
                                      where data the task data, store the workflow
                                      data store, signal the task signal and
                                      context the task context.
            callback_finally (callable): A callable that is always called at the end of
                                         a task, regardless whether it completed
                                         successfully, was stopped or was aborted.
                                         The definition is:
                                           def (status, data, store, signal, context)
                                         where status specifies whether the task was
                                           success: TaskStatus.Success
                                           stopped: TaskStatus.Stopped
                                           aborted: TaskStatus.Aborted
                                           raised exception: TaskStatus.Error
                                         data the task data, store the workflow
                                         data store, signal the task signal and
                                         context the task context.
            force_run (bool): Run the task even if it is flagged to be skipped.
            propagate_skip (bool): Propagate the skip flag to the next task.
        """
        super().__init__(name, queue=queue,
                         callback_init=callback_init, callback_finally=callback_finally,
                         force_run=force_run, propagate_skip=propagate_skip)

        # set the tasks's parameters
        self.params = TaskParameters(
            path=path,
            recursive=recursive,
            aggregate=aggregate if aggregate is not None else 1,
            skip_duplicate=skip_duplicate,
            use_existing=use_existing,
            flush_existing=flush_existing,
            exclude_mask=exclude_mask,
            event_trigger_time=event_trigger_time,
            stop_polling_rate=stop_polling_rate,
            on_file_create=on_file_create,
            on_file_close=on_file_close,
            on_file_delete=on_file_delete,
            on_file_move=on_file_move
        )
        self._callback = callback

    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the NotifyTriggerTask task.

        Args:
            data (MultiTaskData): The data object that has been passed from the
                                  predecessor task.
            store (DataStoreDocument): The persistent data store object that allows the
                                       task to store data for access across the current
                                       workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                                 and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.

        Raises:
            LightflowFilesystemPathError: If the specified path is not absolute.
        """
        params = self.params.eval(data, store)

        # build notification mask
        on_file_create = constants.IN_CREATE if params.on_file_create else 0x00000000
        on_file_close = constants.IN_CLOSE_WRITE if params.on_file_close else 0x00000000
        on_file_delete = constants.IN_DELETE if params.on_file_delete else 0x00000000
        on_file_move = constants.IN_MOVE if params.on_file_move else 0x00000000
        mask = (on_file_create | on_file_close | on_file_delete | on_file_move)

        if not os.path.isabs(params.path):
            raise LightflowFilesystemPathError(
                'The specified path is not an absolute path')

        if params.recursive:
            notify = adapters.InotifyTree(params.path.encode('utf-8'))
        else:
            notify = adapters.Inotify()
            notify.add_watch(params.path.encode('utf-8'))

        # setup regex
        if isinstance(params.exclude_mask, str):
            regex = re.compile(params.exclude_mask)
        else:
            regex = None

        # if requested, pre-fill the file list with existing files
        files = []
        if params.use_existing:
            for (dir_path, dir_names, filenames) in os.walk(params.path):
                files.extend([os.path.join(dir_path, filename) for filename in filenames])
                if not params.recursive:
                    break

            if regex is not None:
                files = [file for file in files if regex.search(file) is None]

            if params.flush_existing and len(files) > 0:
                if self._callback is not None:
                    self._callback(files, data, store, signal, context)
                del files[:]

        polling_event_number = 0
        try:
            for event in notify.event_gen():
                if params.event_trigger_time is not None:
                    time.sleep(params.event_trigger_time)

                # check every stop_polling_rate events the stop signal
                polling_event_number += 1
                if polling_event_number > params.stop_polling_rate:
                    polling_event_number = 0
                    if signal.is_stopped:
                        break

                # in case of an event check whether it matches the mask and call a dag
                if event is not None:
                    (header, type_names, watch_path, filename) = event

                    if (not header.mask & constants.IN_ISDIR) and\
                            (header.mask & mask):
                        new_file = os.path.join(watch_path.decode('utf-8'),
                                                filename.decode('utf-8'))

                        add_file = not params.skip_duplicate or \
                            (params.skip_duplicate and new_file not in files)

                        if add_file and regex is not None:
                            add_file = regex.search(new_file) is None

                        if add_file:
                            files.append(new_file)

                # as soon as enough files have been aggregated call the sub dag
                if len(files) >= params.aggregate:
                    chunks = len(files) // params.aggregate
                    for i in range(0, chunks):
                        if self._callback is not None:
                            self._callback(files[0:params.aggregate], data,
                                           store, signal, context)
                        del files[0:params.aggregate]

        finally:
            if not params.recursive:
                notify.remove_watch(params.path.encode('utf-8'))

        return Action(data)
Пример #8
0
class MakeDirTask(BaseTask):
    """ Creates one or more new directories if they do not exist yet. """
    def __init__(self, name, paths, *, queue=JobType.Task,
                 callback_init=None, callback_finally=None,
                 force_run=False, propagate_skip=True):
        """ Initialize the MakeDir task.

        All task parameters except the name, queue, force_run and propagate_skip
        can either be their native type or a callable returning the native type.

        Args:
            name (str): The name of the task.
            paths (str/list/callable): A path, or list of paths representing the
                                       directories that should be created. The paths have
                                       to be absolute paths, otherwise an exception is
                                       thrown. This parameter can either be a string,
                                       a list of strings or a callable that returns a
                                       string or a list of strings.
            queue (str): Name of the queue the task should be scheduled to. Defaults to
                         the general task queue.
            callback_init (callable): A callable that is called shortly before the task
                                      is run. The definition is:
                                        def (data, store, signal, context)
                                      where data the task data, store the workflow
                                      data store, signal the task signal and
                                      context the task context.
            callback_finally (callable): A callable that is always called at the end of
                                         a task, regardless whether it completed
                                         successfully, was stopped or was aborted.
                                         The definition is:
                                           def (status, data, store, signal, context)
                                         where status specifies whether the task was
                                           success: TaskStatus.Success
                                           stopped: TaskStatus.Stopped
                                           aborted: TaskStatus.Aborted
                                           raised exception: TaskStatus.Error
                                         data the task data, store the workflow
                                         data store, signal the task signal and
                                         context the task context.
            force_run (bool): Run the task even if it is flagged to be skipped.
            propagate_skip (bool): Propagate the skip flag to the next task.
        """
        super().__init__(name, queue=queue,
                         callback_init=callback_init, callback_finally=callback_finally,
                         force_run=force_run, propagate_skip=propagate_skip)

        self.params = TaskParameters(paths=paths)

    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the MakeDir task.

        Args:
            data (MultiTaskData): The data object that has been passed from the
                                  predecessor task.
            store (DataStoreDocument): The persistent data store object that allows the
                                       task to store data for access across the current
                                       workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                                 and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.

        Raises:
            AbsolutePathError: If the specified directories are not absolute paths.

        Returns:
            Action: An Action object containing the data that should be passed on
                    to the next task and optionally a list of successor tasks that
                    should be executed.
        """
        params = self.params.eval(data, store)
        paths = [params.paths] if isinstance(params.paths, str) else params.paths
#
        for path in paths:
            if not os.path.isabs(path):
                raise LightflowFilesystemPathError(
                    'The specified path is not an absolute path')

            if not os.path.exists(path):
                try:
                    os.makedirs(path)
                except OSError as e:
                    raise LightflowFilesystemMkdirError(e)

            else:
                logger.info('Directory {} already exists. Skip creation.'.format(path))

        return Action(data)
Пример #9
0
class ChmodTask(BaseTask):
    """ Sets the POSIX permissions of files and directories. """
    def __init__(self,
                 name,
                 paths,
                 permission,
                 recursive=True,
                 only_dirs=False,
                 *,
                 queue=JobType.Task,
                 callback_init=None,
                 callback_finally=None,
                 force_run=False,
                 propagate_skip=True):
        """ Initialize the change permission task.

        All task parameters except the name, queue, force_run and propagate_skip
        can either be their native type or a callable returning the native type.

        Args:
            name (str): The name of the task.
            paths (str/list/callable): A path, or list of paths representing the files or
                                       directories for which the permissions should be
                                       changed. The paths have to be absolute paths,
                                       otherwise an exception is thrown. This parameter
                                       can either be a string, a list of strings or a
                                       callable that returns a string or a list
                                       of strings.
            permission: The POSIX permission as a string (e.g. '755'). This parameter can
                        either be a string or a callable returning a string.
            recursive: Set to True to recursively change subfolders and files
                       if a path is pointing to a directory. This parameter can either be
                       a Boolean value or a callable returning a Boolean value.
            only_dirs: Set to True to only set the permission for directories and
                       not for files. This parameter can either be a Boolean value or
                       a callable returning a Boolean value.
            queue (str): Name of the queue the task should be scheduled to. Defaults to
                         the general task queue.
            callback_init (callable): A callable that is called shortly before the task
                                      is run. The definition is:
                                        def (data, store, signal, context)
                                      where data the task data, store the workflow
                                      data store, signal the task signal and
                                      context the task context.
            callback_finally (callable): A callable that is always called at the end of
                                         a task, regardless whether it completed
                                         successfully, was stopped or was aborted.
                                         The definition is:
                                           def (status, data, store, signal, context)
                                         where status specifies whether the task was
                                           success: TaskStatus.Success
                                           stopped: TaskStatus.Stopped
                                           aborted: TaskStatus.Aborted
                                           raised exception: TaskStatus.Error
                                         data the task data, store the workflow
                                         data store, signal the task signal and
                                         context the task context.
            force_run (bool): Run the task even if it is flagged to be skipped.
            propagate_skip (bool): Propagate the skip flag to the next task.
        """
        super().__init__(name,
                         queue=queue,
                         callback_init=callback_init,
                         callback_finally=callback_finally,
                         force_run=force_run,
                         propagate_skip=propagate_skip)

        self.params = TaskParameters(paths=paths,
                                     permission=permission,
                                     recursive=recursive,
                                     only_dirs=only_dirs)

    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the ChmodTask task.

        Args:
            data (MultiTaskData): The data object that has been passed from the
                                  predecessor task.
            store (DataStoreDocument): The persistent data store object that allows the
                                       task to store data for access across the current
                                       workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                                 and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.

        Raises:
            LightflowFilesystemPathError: If the specified path is not absolute.
            LightflowFilesystemChmodError: If an error occurred while the ownership is set

        Returns:
            Action: An Action object containing the data that should be passed on
                    to the next task and optionally a list of successor tasks that
                    should be executed.
        """
        params = self.params.eval(data, store)
        path_perm = int(params.permission, 8)
        paths = [params.paths] if isinstance(params.paths,
                                             str) else params.paths

        for path in paths:
            if os.path.isdir(path):
                if not os.path.isabs(path):
                    raise LightflowFilesystemPathError(
                        'The specified path is not an absolute path')

                try:
                    # set the permission for the root directory
                    os.chmod(path, path_perm)

                    # get the files and sub-directories
                    if params.recursive:
                        dir_tree = os.walk(path, topdown=False)
                    else:
                        dir_tree = [(path, [], [
                            f for f in os.listdir(path)
                            if os.path.isfile(os.path.join(path, f))
                        ])]

                    # iterate over the directory tree and set the POSIX permissions
                    for root, dirs, files in dir_tree:
                        if not params.only_dirs:
                            for name in files:
                                os.chmod(os.path.join(root, name), path_perm)

                        for name in dirs:
                            os.chmod(os.path.join(root, name), path_perm)
                except (OSError, FileNotFoundError) as e:
                    LightflowFilesystemChmodError(e)
            else:
                try:
                    os.chmod(path, path_perm)
                except (OSError, FileNotFoundError) as e:
                    LightflowFilesystemChmodError(e)