class NewLineTriggerTask(BaseTask): """ Triggers a callback function upon a new line added to a file. This trigger task watches a specified file for new line. After having aggregated a given number of line changes it calls the provided callback function with a list of lines that were added. """ def __init__(self, name, path, callback, aggregate=None, use_existing=False, flush_existing=True, event_trigger_time=0.5, stop_polling_rate=2, *, callback_init=None, callback_finally=None, queue=JobType.Task, force_run=False, propagate_skip=True): """ Initialize the filesystem notify trigger task. All task parameters except the name, callback, queue, force_run and propagate_skip can either be their native type or a callable returning the native type. Args: name (str): The name of the task. path: The path to the file that should be watched for new lines. The path has to be an absolute path, otherwise an exception is thrown. callback (callable): A callable object that is called with the list of lines that have changed. The function definition is def callback(lines, data, store, signal, context). aggregate (int, None): The number of lines that are aggregated before the callback is called. Set to None or 1 to trigger on each new line event occurrence. use_existing (bool): Use the existing lines that are located in file for initialising the line list. flush_existing (bool): If 'use_existing' is True, then flush all existing lines without regard to the aggregation setting. I.e,. all existing lines are sent to the callback. event_trigger_time (float, None): The waiting time between events in seconds. Set to None to turn off. stop_polling_rate (float): The number of events after which a signal is sent to the workflow to check whether the task should be stopped. queue (str): Name of the queue the task should be scheduled to. Defaults to the general task queue. callback_init (callable): A callable that is called shortly before the task is run. The definition is: def (data, store, signal, context) where data the task data, store the workflow data store, signal the task signal and context the task context. callback_finally (callable): A callable that is always called at the end of a task, regardless whether it completed successfully, was stopped or was aborted. The definition is: def (status, data, store, signal, context) where status specifies whether the task was success: TaskStatus.Success stopped: TaskStatus.Stopped aborted: TaskStatus.Aborted raised exception: TaskStatus.Error data the task data, store the workflow data store, signal the task signal and context the task context. force_run (bool): Run the task even if it is flagged to be skipped. propagate_skip (bool): Propagate the skip flag to the next task. """ super().__init__(name, queue=queue, callback_init=callback_init, callback_finally=callback_finally, force_run=force_run, propagate_skip=propagate_skip) # set the tasks's parameters self.params = TaskParameters( path=path, aggregate=aggregate if aggregate is not None else 1, use_existing=use_existing, flush_existing=flush_existing, event_trigger_time=event_trigger_time, stop_polling_rate=stop_polling_rate, ) self._callback = callback def run(self, data, store, signal, context, **kwargs): """ The main run method of the NotifyTriggerTask task. Args: data (MultiTaskData): The data object that has been passed from the predecessor task. store (DataStoreDocument): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. Raises: LightflowFilesystemPathError: If the specified path is not absolute. """ params = self.params.eval(data, store) if not os.path.isabs(params.path): raise LightflowFilesystemPathError( 'The specified path is not an absolute path') # if requested, pre-fill the file list with existing lines lines = [] num_read_lines = 0 if params.use_existing: with open(params.path, 'r') as file: lines = file.readlines() num_read_lines = len(lines) if params.flush_existing and num_read_lines > 0: if self._callback is not None: self._callback(lines, data, store, signal, context) del lines[:] polling_event_number = 0 def watch_file(file_pointer, task_signal): while True: if task_signal.is_stopped: break new = file_pointer.readline() if new: yield new else: time.sleep(params.event_trigger_time) file = open(params.path, 'r') try: if params.use_existing: for i in range(num_read_lines): file.readline() else: file.seek(0, 2) for line in watch_file(file, signal): lines.append(line) # check every stop_polling_rate events the stop signal polling_event_number += 1 if polling_event_number > params.stop_polling_rate: polling_event_number = 0 if signal.is_stopped: break # as soon as enough lines have been aggregated call the callback function if len(lines) >= params.aggregate: chunks = len(lines) // params.aggregate for i in range(0, chunks): if self._callback is not None: self._callback(lines[0:params.aggregate], data, store, signal, context) del lines[0:params.aggregate] finally: file.close() return Action(data)
class GlobTask(BaseTask): """ Returns list of files from path using glob. """ def __init__(self, name, paths, callback, pattern='*', recursive=False, return_abs=True, *, queue=JobType.Task, callback_init=None, callback_finally=None, force_run=False, propagate_skip=True): """ Initialize the glob task object. All task parameters except the name, callback, queue, force_run and propagate_skip can either be their native type or a callable returning the native type. Args: name (str): The name of the task. paths (str/list/callable): A path, or list of paths, to look in for files. The paths have to be absolute paths, otherwise an exception is thrown. This parameter can either be a string, a list of strings or a callable that returns a string or a list of strings. callback (callable): A callable object that is called with the result of the glob operation. The function definition is def callback(files, data, store, signal, context). pattern (str): The glob style pattern to match when returning files. recursive (bool): Recursively look for files. Use ** to match any files and zero or more directories and subdirectories. May slow things down if lots of files. return_abs (bool): If True return absolute paths, if False return filename only. queue (str): Name of the queue the task should be scheduled to. Defaults to the general task queue. callback_init (callable): A callable that is called shortly before the task is run. The definition is: def (data, store, signal, context) where data the task data, store the workflow data store, signal the task signal and context the task context. callback_finally (callable): A callable that is always called at the end of a task, regardless whether it completed successfully, was stopped or was aborted. The definition is: def (status, data, store, signal, context) where status specifies whether the task was success: TaskStatus.Success stopped: TaskStatus.Stopped aborted: TaskStatus.Aborted raised exception: TaskStatus.Error data the task data, store the workflow data store, signal the task signal and context the task context. force_run (bool): Run the task even if it is flagged to be skipped. propagate_skip (bool): Propagate the skip flag to the next task. """ super().__init__(name, queue=queue, callback_init=callback_init, callback_finally=callback_finally, force_run=force_run, propagate_skip=propagate_skip) self.params = TaskParameters(paths=paths, pattern=pattern, recursive=recursive, return_abs=return_abs) self._callback = callback def run(self, data, store, signal, context, **kwargs): """ The main run method of the glob task. Args: data (MultiTaskData): The data object that has been passed from the predecessor task. store (DataStoreDocument): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. Raises: LightflowFilesystemPathError: If the specified path is not absolute. Returns: Action: An Action object containing the data that should be passed on to the next task and optionally a list of successor tasks that should be executed. """ params = self.params.eval(data, store) paths = [params.paths] if isinstance(params.paths, str) else params.paths if not all([isabs(path) for path in paths]): raise LightflowFilesystemPathError( 'The specified path is not an absolute path') files = [ file if params.return_abs else basename(file) for path in paths for file in glob(pjoin(path, params.pattern), recursive=params.recursive) ] if self._callback is not None: self._callback(files, data, store, signal, context) return Action(data)
class BashTask(BaseTask): """ The Bash task executes a user-defined bash command or bash file. All task parameters except the name, callbacks, queue, force_run and propagate_skip can either be their native type or a callable returning the native type. Args: name (str): The name of the task. command (function, str): The command or bash file that should be executed. cwd (function, str, None): The working directory for the command. env (function, dict, None): A dictionary of environment variables. user (function, int, None): The user ID of the user with which the command should be executed. group (function, int, None): The group ID of the group with which the command should be executed. stdin (function, str, None): An input string that should be passed on to the process. refresh_time (function, float): The time in seconds the internal output handling waits before checking for new output from the process. capture_stdout (function, bool): Set to ``True`` to capture all standard output in a temporary file. capture_stderr (function, bool): Set to ``True`` to capture all standard errors in a temporary file. callback_process (callable): A callable that is called after the process started. The definition is:: (pid, data, store, signal, context) -> None with the parameters: - **pid** (*int*): The process PID. - **data** (:class:`.MultiTaskData`): The data object that has been passed\ from the predecessor task. - **store** (:class:`.DataStoreDocument`): The persistent data store object\ that allows the task to store data for access across the current\ workflow run. - **signal** (*TaskSignal*): The signal object for tasks. It wraps\ the construction and sending of signals into easy to use methods. - **context** (*TaskContext*): The context in which the tasks runs. callback_end (callable): A callable that is called after the process completed. The definition is:: (returncode, stdout_file, stderr_file, data, store, signal, context) -> None with the parameters: - **returncode** (*int*): The return code of the process. - **stdout_file**: A file object with the standard output\ if the flag ``capture_stdout`` was set to ``True``,\ otherwise ``None``. - **stderr_file**: A file object with the error output\ if the flag ``capture_stderr`` was set to ``True`` otherwise ``None.`` - **data** (:class:`.MultiTaskData`): The data object that has been passed\ from the predecessor task. - **store** (:class:`.DataStoreDocument`): The persistent data store object\ that allows the task to store data for access across the current\ workflow run. - **signal** (*TaskSignal*): The signal object for tasks. It wraps\ the construction and sending of signals into easy to use methods. - **context** (*TaskContext*): The context in which the tasks runs. callback_stdout (callable): A callable that is called for every line of output the process sends to stdout. The definition is:: (line, data, store, signal, context) -> None with the parameters: - **line** (*str*): Single line of the process output as a string. - **data** (:class:`.MultiTaskData`): The data object that has been passed\ from the predecessor task. - **store** (:class:`.DataStoreDocument`): The persistent data store object\ that allows the task to store data for access across the current\ workflow run. - **signal** (*TaskSignal*): The signal object for tasks. It wraps\ the construction and sending of signals into easy to use methods. - **context** (*TaskContext*): The context in which the tasks runs. callback_stderr (callable): A callable that is called for every line of output the process sends to stderr. The definition is:: (line, data, store, signal, context) -> None with the parameters: - **line** (*str*): Single line of the process output as a string. - **data** (:class:`.MultiTaskData`): The data object that has been passed\ from the predecessor task. - **store** (:class:`.DataStoreDocument`): The persistent data store object\ that allows the task to store data for access across the current\ workflow run. - **signal** (*TaskSignal*): The signal object for tasks. It wraps\ the construction and sending of signals into easy to use methods. - **context** (*TaskContext*): The context in which the tasks runs. queue (str): Name of the queue the task should be scheduled to. Defaults to the general task queue. callback_init (callable): An optional callable that is called shortly before the task is run. The definition is:: (data, store, signal, context) -> None with the parameters: - **data** (:class:`.MultiTaskData`): The data object that has been passed\ from the predecessor task. - **store** (:class:`.DataStoreDocument`): The persistent data store object\ that allows the task to store data for access across the current\ workflow run. - **signal** (*TaskSignal*): The signal object for tasks. It wraps\ the construction and sending of signals into easy to use methods. - **context** (*TaskContext*): The context in which the tasks runs. callback_finally (callable): An optional callable that is always called at the end of a task, regardless whether it completed successfully, was stopped or was aborted. The definition is:: (status, data, store, signal, context) -> None with the parameters: - **status** (*TaskStatus*): The current status of the task. It can\ be one of the following: - ``TaskStatus.Success`` -- task was successful - ``TaskStatus.Stopped`` -- task was stopped - ``TaskStatus.Aborted`` -- task was aborted - ``TaskStatus.Error`` -- task raised an exception - **data** (:class:`.MultiTaskData`): The data object that has been passed\ from the predecessor task. - **store** (:class:`.DataStoreDocument`): The persistent data store object\ that allows the task to store data for access across the current\ workflow run. - **signal** (*TaskSignal*): The signal object for tasks. It wraps\ the construction and sending of signals into easy to use methods. - **context** (*TaskContext*): The context in which the tasks runs. force_run (bool): Run the task even if it is flagged to be skipped. propagate_skip (bool): Propagate the skip flag to the next task. """ def __init__(self, name, command, cwd=None, env=None, user=None, group=None, stdin=None, refresh_time=0.1, capture_stdout=False, capture_stderr=False, callback_process=None, callback_end=None, callback_stdout=None, callback_stderr=None, *, queue=DefaultJobQueueName.Task, callback_init=None, callback_finally=None, force_run=False, propagate_skip=True): super().__init__(name, queue=queue, callback_init=callback_init, callback_finally=callback_finally, force_run=force_run, propagate_skip=propagate_skip) self.params = TaskParameters(command=command, cwd=cwd, env=env, user=user, group=group, stdin=stdin, refresh_time=refresh_time, capture_stdout=capture_stdout, capture_stderr=capture_stderr) self._callback_process = callback_process self._callback_end = callback_end self._callback_stdout = callback_stdout self._callback_stderr = callback_stderr def run(self, data, store, signal, context, **kwargs): """ The main run method of the Python task. Args: data (:class:`.MultiTaskData`): The data object that has been passed from the predecessor task. store (:class:`.DataStoreDocument`): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. Returns: Action (Action): An Action object containing the data that should be passed on to the next task and optionally a list of successor tasks that should be executed. """ params = self.params.eval(data, store, exclude=['command']) capture_stdout = self._callback_stdout is not None or params.capture_stdout capture_stderr = self._callback_stderr is not None or params.capture_stderr stdout_file = TemporaryFile() if params.capture_stdout else None stderr_file = TemporaryFile() if params.capture_stderr else None stdout = PIPE if capture_stdout else None stderr = PIPE if capture_stderr else None # change the user or group under which the process should run if params.user is not None or params.group is not None: pre_exec = self._run_as(params.user, params.group) else: pre_exec = None # call the command proc = Popen(self.params.eval_single('command', data, store), cwd=params.cwd, shell=True, env=params.env, preexec_fn=pre_exec, stdout=stdout, stderr=stderr, stdin=PIPE if params.stdin is not None else None) # if input is available, send it to the process if params.stdin is not None: proc.stdin.write(params.stdin.encode(sys.getfilesystemencoding())) # send a notification that the process has been started try: if self._callback_process is not None: self._callback_process(proc.pid, data, store, signal, context) except (StopTask, AbortWorkflow): proc.terminate() raise # send the output handling to a thread if capture_stdout or capture_stderr: output_reader = BashTaskOutputReader(proc, stdout_file, stderr_file, self._callback_stdout, self._callback_stderr, params.refresh_time, data, store, signal, context) output_reader.start() else: output_reader = None # wait for the process to complete and watch for a stop signal while proc.poll() is None or\ (output_reader is not None and output_reader.is_alive()): sleep(params.refresh_time) if signal.is_stopped: proc.terminate() if output_reader is not None: output_reader.join() data = output_reader.data # if a stop or abort exception was raised, stop the bash process and re-raise if output_reader.exc_obj is not None: if proc.poll() is None: proc.terminate() raise output_reader.exc_obj # send a notification that the process has completed if self._callback_end is not None: if stdout_file is not None: stdout_file.seek(0) if stderr_file is not None: stderr_file.seek(0) self._callback_end(proc.returncode, stdout_file, stderr_file, data, store, signal, context) if stdout_file is not None: stdout_file.close() if stderr_file is not None: stderr_file.close() return Action(data) @staticmethod def _run_as(user, group): """ Function wrapper that sets the user and group for the process """ def wrapper(): if user is not None: os.setuid(user) if group is not None: os.setgid(group) return wrapper
class PvTriggerTask(BaseTask): """ Triggers the execution of a callback function upon a change in a monitored PV. This trigger task monitors a PV for changes. If a change occurs a provided callback function is executed. """ def __init__(self, name, pv_name, callback, event_trigger_time=None, stop_polling_rate=2, skip_initial_callback=True, *, queue=JobType.Task, callback_init=None, callback_finally=None, force_run=False, propagate_skip=True): """ Initialize the filesystem notify trigger task. All task parameters except the name, callback, queue, force_run and propagate_skip can either be their native type or a callable returning the native type. Args: name (str): The name of the task. pv_name (str, callable): The name of the PV that should be monitored. callback (callable): A callable object that is called when the PV changes. The function definition is def callback(data, store, signal, context, event) where event is the information returned by PyEPICS for a monitor callback event. event_trigger_time (float, None): The waiting time between events in seconds. Set to None to turn off. stop_polling_rate (float): The number of events after which a signal is sent to the workflow to check whether the task should be stopped. skip_initial_callback (bool): Set to True to skip executing the callback upon initialization of the PV monitoring. queue (str): Name of the queue the task should be scheduled to. Defaults to the general task queue. callback_init (callable): A callable that is called shortly before the task is run. The definition is: def (data, store, signal, context) where data the task data, store the workflow data store, signal the task signal and context the task context. callback_finally (callable): A callable that is always called at the end of a task, regardless whether it completed successfully, was stopped or was aborted. The definition is: def (status, data, store, signal, context) where status specifies whether the task was success: TaskStatus.Success stopped: TaskStatus.Stopped aborted: TaskStatus.Aborted raised exception: TaskStatus.Error data the task data, store the workflow data store, signal the task signal and context the task context. force_run (bool): Run the task even if it is flagged to be skipped. propagate_skip (bool): Propagate the skip flag to the next task. """ super().__init__(name, queue=queue, callback_init=callback_init, callback_finally=callback_finally, force_run=force_run, propagate_skip=propagate_skip) # set the tasks's parameters self.params = TaskParameters( pv_name=pv_name, event_trigger_time=event_trigger_time, stop_polling_rate=stop_polling_rate, skip_initial_callback=skip_initial_callback) self._callback = callback def run(self, data, store, signal, context, **kwargs): """ The main run method of the PvTriggerTask task. Args: data (MultiTaskData): The data object that has been passed from the predecessor task. store (DataStoreDocument): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. """ params = self.params.eval(data, store) skipped_initial = False if params.skip_initial_callback else True polling_event_number = 0 queue = deque() # set up the internal callback pv = PV(params.pv_name, callback=partial(self._pv_callback, queue=queue)) while True: if params.event_trigger_time is not None: time.sleep(params.event_trigger_time) # check every stop_polling_rate events the stop signal polling_event_number += 1 if polling_event_number > params.stop_polling_rate: polling_event_number = 0 if signal.is_stopped: break # get all the events from the queue and call the callback function while len(queue) > 0: event = queue.pop() if skipped_initial: if self._callback is not None: self._callback(data, store, signal, context, **event) else: skipped_initial = True pv.clear_callbacks() return Action(data) @staticmethod def _pv_callback(queue, **kwargs): """ Internal callback method for the PV monitoring. """ queue.append(kwargs)
class WalkTask(BaseTask): """ Walks (recursively) down a directory and calls a callable for each file. """ def __init__(self, name, path, callback, recursive=False, *, queue=JobType.Task, callback_init=None, callback_finally=None, force_run=False, propagate_skip=True): """ Initialize the walk task object. All task parameters except the name, callback, queue, force_run and propagate_skip can either be their native type or a callable returning the native type. Args: name (str): The name of the task. path (str, callable): The path to the directory that should be walked. The path has to be an absolute path, otherwise an exception is thrown. callback (callable): A callable object that is called for each file in the directory given by path. The function definition is def callback(entry, data, store, signal, context). where entry is of type os.DirEntry. recursive (bool): Recursively look for files in the directory. queue (str): Name of the queue the task should be scheduled to. Defaults to the general task queue. callback_init (callable): A callable that is called shortly before the task is run. The definition is: def (data, store, signal, context) where data the task data, store the workflow data store, signal the task signal and context the task context. callback_finally (callable): A callable that is always called at the end of a task, regardless whether it completed successfully, was stopped or was aborted. The definition is: def (status, data, store, signal, context) where status specifies whether the task was success: TaskStatus.Success stopped: TaskStatus.Stopped aborted: TaskStatus.Aborted raised exception: TaskStatus.Error data the task data, store the workflow data store, signal the task signal and context the task context. force_run (bool): Run the task even if it is flagged to be skipped. propagate_skip (bool): Propagate the skip flag to the next task. """ super().__init__(name, queue=queue, callback_init=callback_init, callback_finally=callback_finally, force_run=force_run, propagate_skip=propagate_skip) self.params = TaskParameters(path=path, recursive=recursive) self._callback = callback def run(self, data, store, signal, context, **kwargs): """ The main run method of the walk task. Args: data (MultiTaskData): The data object that has been passed from the predecessor task. store (DataStoreDocument): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. Raises: LightflowFilesystemPathError: If the specified path is not absolute. Returns: Action: An Action object containing the data that should be passed on to the next task and optionally a list of successor tasks that should be executed. """ params = self.params.eval(data, store) if not isabs(params.path): raise LightflowFilesystemPathError( 'The specified path is not an absolute path') for entry in self._scantree(params.path, params.recursive): if self._callback is not None: self._callback(entry, data, store, signal, context) return Action(data) def _scantree(self, path, recursive=True): """ (recursively) yield DirEntry objects for directory given by the path.""" for entry in scandir(path): if entry.is_dir(follow_symlinks=False): if recursive: yield from self._scantree(entry.path) else: yield entry
class MoveTask(BaseTask): """ Moves a list of files or folders from a source to a destination. """ def __init__(self, name, sources, destination, *, queue=JobType.Task, callback_init=None, callback_finally=None, force_run=False, propagate_skip=True): """ Initialize the Move task. All task parameters except the name, queue, force_run and propagate_skip can either be their native type or a callable returning the native type. Args: name (str): The name of the task. sources (str/list/callable): A single file or directory path or a list of file or directory paths that should be moved. This parameter can either be a string, a list of strings or a callable that returns a string or a list of strings. The paths have to be absolute paths, otherwise an exception is thrown. destination: The destination file or folder the source should be moved to. This parameter can either be a string or a callable returning a string. queue (str): Name of the queue the task should be scheduled to. Defaults to the general task queue. callback_init (callable): A callable that is called shortly before the task is run. The definition is: def (data, store, signal, context) where data the task data, store the workflow data store, signal the task signal and context the task context. callback_finally (callable): A callable that is always called at the end of a task, regardless whether it completed successfully, was stopped or was aborted. The definition is: def (status, data, store, signal, context) where status specifies whether the task was success: TaskStatus.Success stopped: TaskStatus.Stopped aborted: TaskStatus.Aborted raised exception: TaskStatus.Error data the task data, store the workflow data store, signal the task signal and context the task context. force_run (bool): Run the task even if it is flagged to be skipped. propagate_skip (bool): Propagate the skip flag to the next task. """ super().__init__(name, queue=queue, callback_init=callback_init, callback_finally=callback_finally, force_run=force_run, propagate_skip=propagate_skip) self.params = TaskParameters(sources=sources, destination=destination) def run(self, data, store, signal, context, **kwargs): """ The main run method of the MoveTask task. Args: data (MultiTaskData): The data object that has been passed from the predecessor task. store (DataStoreDocument): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. Raises: LightflowFilesystemPathError: If the source is a directory but the target is not. LightflowFilesystemMoveError: If the move process failed. Returns: Action: An Action object containing the data that should be passed on to the next task and optionally a list of successor tasks that should be executed. """ params = self.params.eval(data, store) sources = [params.sources] if isinstance(params.sources, str) else params.sources for source in sources: logger.info('Move {} to {}'.format(source, params.destination)) if not os.path.isabs(source): raise LightflowFilesystemPathError( 'The source path is not an absolute path') if not os.path.isabs(params.destination): raise LightflowFilesystemPathError( 'The destination path is not an absolute path') if os.path.isdir(source) and not os.path.isdir(params.destination): raise LightflowFilesystemPathError( 'The destination is not a valid directory') try: shutil.move(source, params.destination) except OSError as e: raise LightflowFilesystemMoveError(e) return Action(data)
class NotifyTriggerTask(BaseTask): """ Triggers a callback function upon file changes in a directory. This trigger task watches a specified directory for file changes. After having aggregated a given number of file changes it calls the provided callback function with a list of the files that were changed. """ def __init__(self, name, path, callback, recursive=True, aggregate=None, skip_duplicate=False, use_existing=False, flush_existing=True, exclude_mask=None, on_file_create=False, on_file_close=True, on_file_delete=False, on_file_move=False, event_trigger_time=None, stop_polling_rate=2, *, callback_init=None, callback_finally=None, queue=JobType.Task, force_run=False, propagate_skip=True): """ Initialize the filesystem notify trigger task. All task parameters except the name, callback, queue, force_run and propagate_skip can either be their native type or a callable returning the native type. Args: name (str): The name of the task. path: The path to the directory that should be watched for filesystem changes. The path has to be an absolute path, otherwise an exception is thrown. callback (callable): A callable object that is called with the list of files that have changed. The function definition is def callback(files, data, store, signal, context). recursive (bool): Set to True to watch for file system changes in subdirectories of the specified path. Keeps track of the creation and deletion of subdirectories. aggregate (int, None): The number of events that are aggregated before the callback function is called. Set to None or 1 to trigger on each file event occurrence. skip_duplicate (bool): Skip duplicated file names. Duplicated entries can occur if the same file is modified before the list of files is handed to the callback. Another case is if the parameter 'use_existing' is activated and an existing file is modified before the aggregated files are sent to the callback function. use_existing (bool): Use the existing files that are located in path for initializing the file list. flush_existing (bool): If 'use_existing' is True, then flush all existing files without regard to the aggregation setting. I.e,. all existing files sent to the callback. exclude_mask (str): Specifies a regular expression that can be used to exclude files. For example if a detector creates temporary files that should not be sent to the callback function. on_file_create (bool): Set to True to listen for file creation events. on_file_close (bool): Set to True to listen for file closing events. on_file_delete (bool): Set to True to listen for file deletion events. on_file_move (bool): Set to True to listen for file move events. event_trigger_time (float, None): The waiting time between events in seconds. Set to None to turn off. stop_polling_rate (float): The number of events after which a signal is sent to the workflow to check whether the task should be stopped. queue (str): Name of the queue the task should be scheduled to. Defaults to the general task queue. callback_init (callable): A callable that is called shortly before the task is run. The definition is: def (data, store, signal, context) where data the task data, store the workflow data store, signal the task signal and context the task context. callback_finally (callable): A callable that is always called at the end of a task, regardless whether it completed successfully, was stopped or was aborted. The definition is: def (status, data, store, signal, context) where status specifies whether the task was success: TaskStatus.Success stopped: TaskStatus.Stopped aborted: TaskStatus.Aborted raised exception: TaskStatus.Error data the task data, store the workflow data store, signal the task signal and context the task context. force_run (bool): Run the task even if it is flagged to be skipped. propagate_skip (bool): Propagate the skip flag to the next task. """ super().__init__(name, queue=queue, callback_init=callback_init, callback_finally=callback_finally, force_run=force_run, propagate_skip=propagate_skip) # set the tasks's parameters self.params = TaskParameters( path=path, recursive=recursive, aggregate=aggregate if aggregate is not None else 1, skip_duplicate=skip_duplicate, use_existing=use_existing, flush_existing=flush_existing, exclude_mask=exclude_mask, event_trigger_time=event_trigger_time, stop_polling_rate=stop_polling_rate, on_file_create=on_file_create, on_file_close=on_file_close, on_file_delete=on_file_delete, on_file_move=on_file_move ) self._callback = callback def run(self, data, store, signal, context, **kwargs): """ The main run method of the NotifyTriggerTask task. Args: data (MultiTaskData): The data object that has been passed from the predecessor task. store (DataStoreDocument): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. Raises: LightflowFilesystemPathError: If the specified path is not absolute. """ params = self.params.eval(data, store) # build notification mask on_file_create = constants.IN_CREATE if params.on_file_create else 0x00000000 on_file_close = constants.IN_CLOSE_WRITE if params.on_file_close else 0x00000000 on_file_delete = constants.IN_DELETE if params.on_file_delete else 0x00000000 on_file_move = constants.IN_MOVE if params.on_file_move else 0x00000000 mask = (on_file_create | on_file_close | on_file_delete | on_file_move) if not os.path.isabs(params.path): raise LightflowFilesystemPathError( 'The specified path is not an absolute path') if params.recursive: notify = adapters.InotifyTree(params.path.encode('utf-8')) else: notify = adapters.Inotify() notify.add_watch(params.path.encode('utf-8')) # setup regex if isinstance(params.exclude_mask, str): regex = re.compile(params.exclude_mask) else: regex = None # if requested, pre-fill the file list with existing files files = [] if params.use_existing: for (dir_path, dir_names, filenames) in os.walk(params.path): files.extend([os.path.join(dir_path, filename) for filename in filenames]) if not params.recursive: break if regex is not None: files = [file for file in files if regex.search(file) is None] if params.flush_existing and len(files) > 0: if self._callback is not None: self._callback(files, data, store, signal, context) del files[:] polling_event_number = 0 try: for event in notify.event_gen(): if params.event_trigger_time is not None: time.sleep(params.event_trigger_time) # check every stop_polling_rate events the stop signal polling_event_number += 1 if polling_event_number > params.stop_polling_rate: polling_event_number = 0 if signal.is_stopped: break # in case of an event check whether it matches the mask and call a dag if event is not None: (header, type_names, watch_path, filename) = event if (not header.mask & constants.IN_ISDIR) and\ (header.mask & mask): new_file = os.path.join(watch_path.decode('utf-8'), filename.decode('utf-8')) add_file = not params.skip_duplicate or \ (params.skip_duplicate and new_file not in files) if add_file and regex is not None: add_file = regex.search(new_file) is None if add_file: files.append(new_file) # as soon as enough files have been aggregated call the sub dag if len(files) >= params.aggregate: chunks = len(files) // params.aggregate for i in range(0, chunks): if self._callback is not None: self._callback(files[0:params.aggregate], data, store, signal, context) del files[0:params.aggregate] finally: if not params.recursive: notify.remove_watch(params.path.encode('utf-8')) return Action(data)
class MakeDirTask(BaseTask): """ Creates one or more new directories if they do not exist yet. """ def __init__(self, name, paths, *, queue=JobType.Task, callback_init=None, callback_finally=None, force_run=False, propagate_skip=True): """ Initialize the MakeDir task. All task parameters except the name, queue, force_run and propagate_skip can either be their native type or a callable returning the native type. Args: name (str): The name of the task. paths (str/list/callable): A path, or list of paths representing the directories that should be created. The paths have to be absolute paths, otherwise an exception is thrown. This parameter can either be a string, a list of strings or a callable that returns a string or a list of strings. queue (str): Name of the queue the task should be scheduled to. Defaults to the general task queue. callback_init (callable): A callable that is called shortly before the task is run. The definition is: def (data, store, signal, context) where data the task data, store the workflow data store, signal the task signal and context the task context. callback_finally (callable): A callable that is always called at the end of a task, regardless whether it completed successfully, was stopped or was aborted. The definition is: def (status, data, store, signal, context) where status specifies whether the task was success: TaskStatus.Success stopped: TaskStatus.Stopped aborted: TaskStatus.Aborted raised exception: TaskStatus.Error data the task data, store the workflow data store, signal the task signal and context the task context. force_run (bool): Run the task even if it is flagged to be skipped. propagate_skip (bool): Propagate the skip flag to the next task. """ super().__init__(name, queue=queue, callback_init=callback_init, callback_finally=callback_finally, force_run=force_run, propagate_skip=propagate_skip) self.params = TaskParameters(paths=paths) def run(self, data, store, signal, context, **kwargs): """ The main run method of the MakeDir task. Args: data (MultiTaskData): The data object that has been passed from the predecessor task. store (DataStoreDocument): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. Raises: AbsolutePathError: If the specified directories are not absolute paths. Returns: Action: An Action object containing the data that should be passed on to the next task and optionally a list of successor tasks that should be executed. """ params = self.params.eval(data, store) paths = [params.paths] if isinstance(params.paths, str) else params.paths # for path in paths: if not os.path.isabs(path): raise LightflowFilesystemPathError( 'The specified path is not an absolute path') if not os.path.exists(path): try: os.makedirs(path) except OSError as e: raise LightflowFilesystemMkdirError(e) else: logger.info('Directory {} already exists. Skip creation.'.format(path)) return Action(data)
class ChmodTask(BaseTask): """ Sets the POSIX permissions of files and directories. """ def __init__(self, name, paths, permission, recursive=True, only_dirs=False, *, queue=JobType.Task, callback_init=None, callback_finally=None, force_run=False, propagate_skip=True): """ Initialize the change permission task. All task parameters except the name, queue, force_run and propagate_skip can either be their native type or a callable returning the native type. Args: name (str): The name of the task. paths (str/list/callable): A path, or list of paths representing the files or directories for which the permissions should be changed. The paths have to be absolute paths, otherwise an exception is thrown. This parameter can either be a string, a list of strings or a callable that returns a string or a list of strings. permission: The POSIX permission as a string (e.g. '755'). This parameter can either be a string or a callable returning a string. recursive: Set to True to recursively change subfolders and files if a path is pointing to a directory. This parameter can either be a Boolean value or a callable returning a Boolean value. only_dirs: Set to True to only set the permission for directories and not for files. This parameter can either be a Boolean value or a callable returning a Boolean value. queue (str): Name of the queue the task should be scheduled to. Defaults to the general task queue. callback_init (callable): A callable that is called shortly before the task is run. The definition is: def (data, store, signal, context) where data the task data, store the workflow data store, signal the task signal and context the task context. callback_finally (callable): A callable that is always called at the end of a task, regardless whether it completed successfully, was stopped or was aborted. The definition is: def (status, data, store, signal, context) where status specifies whether the task was success: TaskStatus.Success stopped: TaskStatus.Stopped aborted: TaskStatus.Aborted raised exception: TaskStatus.Error data the task data, store the workflow data store, signal the task signal and context the task context. force_run (bool): Run the task even if it is flagged to be skipped. propagate_skip (bool): Propagate the skip flag to the next task. """ super().__init__(name, queue=queue, callback_init=callback_init, callback_finally=callback_finally, force_run=force_run, propagate_skip=propagate_skip) self.params = TaskParameters(paths=paths, permission=permission, recursive=recursive, only_dirs=only_dirs) def run(self, data, store, signal, context, **kwargs): """ The main run method of the ChmodTask task. Args: data (MultiTaskData): The data object that has been passed from the predecessor task. store (DataStoreDocument): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. Raises: LightflowFilesystemPathError: If the specified path is not absolute. LightflowFilesystemChmodError: If an error occurred while the ownership is set Returns: Action: An Action object containing the data that should be passed on to the next task and optionally a list of successor tasks that should be executed. """ params = self.params.eval(data, store) path_perm = int(params.permission, 8) paths = [params.paths] if isinstance(params.paths, str) else params.paths for path in paths: if os.path.isdir(path): if not os.path.isabs(path): raise LightflowFilesystemPathError( 'The specified path is not an absolute path') try: # set the permission for the root directory os.chmod(path, path_perm) # get the files and sub-directories if params.recursive: dir_tree = os.walk(path, topdown=False) else: dir_tree = [(path, [], [ f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) ])] # iterate over the directory tree and set the POSIX permissions for root, dirs, files in dir_tree: if not params.only_dirs: for name in files: os.chmod(os.path.join(root, name), path_perm) for name in dirs: os.chmod(os.path.join(root, name), path_perm) except (OSError, FileNotFoundError) as e: LightflowFilesystemChmodError(e) else: try: os.chmod(path, path_perm) except (OSError, FileNotFoundError) as e: LightflowFilesystemChmodError(e)