def plot_image(data, store, signal, context): ''' Store the image.''' plot_path = "/home/xf11bm/test.png" img = data['image'] print(img) print(np) plt.figure(0) plt.clf() plt.imshow(np.log(img)) plt.savefig(plot_path) # create the main DAG that spawns others main_dag = Dag('main_dag') main_task = PythonTask(name="main", callback=main) main_dag.define({ main_task: None, }) from SciStreams.XS_Streams import filter_attributes, pick_allowed_detectors # the secondary circular average task # create the two tasks for storing and retrieving data put_task = PythonTask(name='put_task', callback=put_data) grab_image_task = PythonTask(name='grab_image', callback=grab_image) plot_image_task = PythonTask(name='plot_image', callback=plot_image) # set up the graph of the DAG, in which the put_task has to be executed first, # followed by the print_task. circavg_dag_dict = {
# dataset in the list of all datasets. The second dataset is referenced by its index==1. def multiply_data(data, store, signal, context): data['value'] = data['value'] * data.get_by_index(1)['value'] # subtract two values by using the aliases of the two datasets and different functions # for illustration purposes: get_by_alias() and the shorthand notation ([alias]) def subtract_data(data, store, signal, context): data['value'] = data.get_by_alias('first')['value'] - data( 'second')['value'] # create the main DAG based on the diagram above d = Dag('main_dag') put_task = PythonTask(name='put_task', callback=put_data) square_task = PythonTask(name='square_task', callback=square_data) multiply_task = PythonTask(name='multiply_task', callback=multiply_data) subtract_task = PythonTask(name='subtract_task', callback=subtract_data) print_task_1 = PythonTask(name='print_task_1', callback=print_data) print_task_2 = PythonTask(name='print_task_2', callback=print_data) print_task_3 = PythonTask(name='print_task_3', callback=print_data) print_task_4 = PythonTask(name='print_task_4', callback=print_data) d.define({ put_task: { print_task_1: None, square_task: None, multiply_task: None, subtract_task: 'first'
from lightflow.models import Dag from lightflow.tasks import PythonTask # the callback function for the tasks def print_info(data, store, signal, context): print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id})'.format( **context.to_dict())) # create the main DAG d = Dag('main_dag') # task that limits the branching to certain successor tasks branch_task = PythonTask(name='branch_task', callback=print_info) # first task, first lane lane1_print_task = PythonTask(name='lane1_print_task', callback=print_info) # first task, second lane lane2_print_task = PythonTask(name='lane2_print_task', callback=print_info) # first task, third lane lane3_print_task = PythonTask(name='lane3_print_task', callback=print_info) # joins all three lanes together and waits for the predecessor tasks to finish processing join_task = PythonTask(name='t_join_me', callback=print_info) # set up the graph of the DAG as illustrated above. Please note how a list of tasks # defines tasks that are run in parallel (branched out).
from lightflow.models import Dag from lightflow.tasks import PythonTask # the callback function for the task that stores the value 5 def put_data(data, store, signal, context): print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id}) ' 'on {worker_hostname}'.format(**context.to_dict())) data['value'] = 5 # the callback function for the task that prints the data def print_value(data, store, signal, context): print('The value is: {}'.format(data['value'])) # create the main DAG d = Dag('main_dag') # create the two tasks for storing and retrieving data put_task = PythonTask(name='put_task', callback=put_data) print_task = PythonTask(name='print_task', callback=print_value) # set up the graph of the DAG, in which the put_task has to be executed first, # followed by the print_task. d.define({put_task: print_task})
# give it some provenance and data new_data = dict(img=img) new_data['md'] = md.copy() new_data = TaskData(data=new_data) new_data = MultiTaskData(dataset=new_data) good_attr = filter_attributes(new_data['md']) if good_attr: print("got a good image") # one image dags should go here dag_name = signal.start_dag(one_image_dag, data=new_data) print("primary node, dag name: {}".format(dag_name)) dag_names.append(dag_name) else: print("Bad attributes!") signal.join_dags(dag_names) # create the main DAG that spawns others #img_dag = Dag('img_dag') primary_task = PythonTask(name="primary_task", callback=primary_func, queue='cms-primary-task') primary_dag_dict = { primary_task: None, } primary_dag = Dag("primary_dag", autostart=True, queue='cms-primary') primary_dag.define(primary_dag_dict)
# print tasks in lane 1 and lane 2. The successor tasks can be specified by either their # name or the task object itself. Both methods are shown here. def branch_with_limit(data, store, signal, context): return Action(data, limit=[lane1_print_task, 'lane2_print_task']) # the callback function for tasks that print the data def print_value(data, store, signal, context): print('Task {} and value {}'.format(context.task_name, data['value'])) # create the main DAG d = Dag('main_dag') # task for storing the data put_task = PythonTask(name='put_task', callback=put_data) # task that limits the branching to certain successor tasks branch_task = PythonTask(name='branch_task', callback=branch_with_limit) # first task, first lane, simply prints the value stored in the put_task lane1_print_task = PythonTask(name='lane1_print_task', callback=print_value) # first task, second lane, simply prints the value stored in the put_task lane2_print_task = PythonTask(name='lane2_print_task', callback=print_value) # first task, third lane, simply prints the value stored in the put_task lane3_print_task = PythonTask(name='lane3_print_task', callback=print_value) # joins all three lanes together and waits for the predecessor tasks to finish processing join_task = PythonTask(name='t_join_me', callback=print_value)
data['number'] = random() if data['number'] < 0.5: return Action(data, limit=[small_number_task]) else: return Action(data, limit=[large_number_task]) # the callback function for the small number route def print_small_number(data, store, signal, context): print('Small number: {}'.format(data['number'])) # the callback function for the large number route def print_large_number(data, store, signal, context): print('Large number: {}'.format(data['number'])) # task definitions decision_task = PythonTask(name='decision_task', callback=decide_on_successor) small_number_task = PythonTask(name='small_number_task', callback=print_small_number) large_number_task = PythonTask(name='large_number_task', callback=print_large_number) # create the main DAG d = Dag('main_dag') d.define({decision_task: [small_number_task, large_number_task]})
def stop(data, store, signal, context): raise StopTask('Stop task {} and all successor tasks'.format( context.task_name)) # callback for printing the current task context def print_context(data, store, signal, context): print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id})'.format( **context.to_dict())) # create the main DAG d = Dag('main_dag') start_task = PythonTask(name='start_task', callback=start_all) bash_task = BashTask(name='bash_task', command='for i in `seq 1 10`; do echo "$i"; done', callback_stdout=bash_stdout) stop_noskip_task = PythonTask(name='stop_noskip_task', callback=stop_noskip) stop_task = PythonTask(name='stop_task', callback=stop) print_task_1 = PythonTask(name='print_task_1', callback=print_context) print_task_2 = PythonTask(name='print_task_2', callback=print_context) print_task_3 = PythonTask(name='print_task_3', callback=print_context)
from lightflow.models import Dag, Parameters, Option from lightflow.tasks import PythonTask from lightflow.models.task_data import TaskData, MultiTaskData def test_func(data, store, signal, context): import logging logging.basicConfig(filename='/home/xf11bm/SciStreams/SciStreams/test.log', level=logging.DEBUG) logging.debug('Testing a log write again') #logging.info('So should this') #logging.warning('And this, too') test_task = PythonTask(name="test func", callback=test_func, queue='test') test_dag_dict = { test_task: None, } test_dag = Dag("test", autostart=True) test_dag.define(test_dag_dict)
dag_names = [] for i in range(5): sleep(1) data['image'] = np.ones((100, 100)) started_dag = signal.start_dag(sub_dag, data=data) dag_names.append(started_dag) signal.join_dags(dag_names) # this callback function prints the dimensions of the received numpy array def sub_dag_print(data, store, signal, context): print('Received an image with dimensions: {}'.format(data['image'].shape)) init_task = PythonTask(name='init_task', callback=print_name) call_dag_task = PythonTask(name='call_dag_task', callback=start_sub_dag) # create the main dag that runs the init task first, followed by the call_dag task. main_dag = Dag('main_dag') main_dag.define({init_task: call_dag_task}) # create the tasks for the sub dag that simply prints the shape of the numpy array # passed down from the main dag. print_task = PythonTask(name='print_task', callback=sub_dag_print) # create the sub dag that is being called by the main dag. In order to stop the # system from automatically starting the dag when the workflow is run, set the autostart # parameter to false. sub_dag = Dag('sub_dag', autostart=False)
def make_list(data, store, signal, context): print(context.task_name) data['my_list'] = ['asdf_0001.dat', 'asdf_0002.dat', 'sdfa_0001.dat', 'sdfa_0002.dat', 'sdfa_0003.dat', 'blah_0001.dat', '|', 'blah_0002.dat', 'blah2_0001.dat'] def print_list(data, store, signal, context): print(context.task_name) print('==================================') print(data['my_list']) print('==================================') print_dag = Dag('print_dag', autostart=False) print_list_task = PythonTask(name='print_list', callback=print_list) print_dag.define({print_list_task: None}) chunk_dag = Dag('chunk_dag') make_list_task = PythonTask(name='make_list', callback=make_list) chunk_task = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False, match_pattern='(?P<match>[0-9A-Za-z]*)_', in_key='my_list') chunk_task2 = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False, match_pattern='[0-9A-Za-z]*_', in_key='my_list')
array, and as this is not the case aborts the workflow gracefully. The abort is accomplished by using the Abort exception. """ from lightflow.models import Dag, AbortWorkflow from lightflow.tasks import PythonTask # the callback function for the task that stores the array of three image file names def collect_data(data, store, signal, context): data['images'] = ['img_001.tif', 'img_002.tif', 'img_003.tif'] # the callback function for the task that checks the number of stored file names def check_data(data, store, signal, context): if len(data['images']) < 5: raise AbortWorkflow('At least 5 images are required') # create the main DAG d = Dag('main_dag') # create the two tasks for storing and checking data collect_task = PythonTask(name='collect_task', callback=collect_data) check_task = PythonTask(name='check_task', callback=check_data) # set up the graph of the DAG d.define({collect_task: check_task})
lightflow worker start -q special """ from lightflow.models import Dag from lightflow.tasks import PythonTask # the callback function for the tasks that simply prints the context def print_text(data, store, signal, context): print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id})'.format( **context.to_dict())) # create the main DAG d = Dag('main_dag') # create the two task, where the first task is executed on the 'task' queue and the # second task on the 'special' queue print_task = PythonTask(name='print_task', callback=print_text) print_special = PythonTask(name='print_special', callback=print_text, queue='special') # set up the graph of the DAG, in which the print_task has to be executed first, # followed by the print_special. d.define({print_task: print_special})
print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id}) ' 'on {worker_hostname}'.format(**context.to_dict())) if 'value' not in data: data['value'] = 0 data['value'] = data['value'] + 1 print('This is task #{}'.format(data['value'])) # create the main DAG d = Dag('main_dag') # create the 3 tasks that increment a number task_1 = PythonTask(name='task_1', callback=inc_number) task_2 = PythonTask(name='task_2', callback=inc_number) task_3 = PythonTask(name='task_3', callback=inc_number) # set up the graph of the DAG as a linear sequence of tasks d.define({ task_1: task_2, task_2: task_3 })
data['pv_name'] = pvname data['pv_value'] = value signal.start_dag('pv_action_dag', data=data) return data # the callback function that prints the new PV value after it got changed. def pv_printout(data, store, signal, context): print('PV {} has value: {}'.format(data['pv_name'], data['pv_value'])) # set up the PV monitoring dag. pv_monitor_dag = Dag('pv_monitor_dag') startup_task = PythonTask(name='startup_task', callback=startup) monitor_task = PvTriggerTask(name='monitor_task', pv_name=lambda data, data_store: data_store.get('pvname'), callback=pv_callback, event_trigger_time=0.1, stop_polling_rate=2, skip_initial_callback=True) pv_monitor_dag.define({startup_task: monitor_task}) # set up the PV action dag. pv_action_dag = Dag('pv_action_dag', autostart=False) printout_task = PythonTask(name='printout_task',
# the callback function that is called as soon as new lines were appended to the text # file. It stores the new lines into the data and starts the 'print_dag' dag. def start_print_dag(lines, data, store, signal, context): data['lines'] = lines signal.start_dag('print_dag', data=data) # the callback for printing the new lines from the 'print_dag' dag. def print_lines(data, store, signal, context): print('\n'.join(data['lines'])) # create the task that watches for newly appended lines and the associated dag. new_line_task = NewLineTriggerTask(name='new_line_task', path='/tmp/lightflow_test/watch_lines.txt', callback=start_print_dag, aggregate=None, use_existing=False, flush_existing=False) list_dag = Dag('line_dag') list_dag.define({new_line_task: None}) # create the print dag and set its autostart value to false. print_task = PythonTask(name='print_task', callback=print_lines) print_dag = Dag('print_dag', autostart=False) print_dag.define({print_task: None})
# acquire some basic statistics for each file as long as it is not a symbolic link def acquire_stats(entry, data, store, signal, context): if not entry.is_symlink(): data['count'] += 1 data['size'] += entry.stat(follow_symlinks=False).st_size # print the acquired statistics def print_stats(data, store, signal, context): print('Statistics for folder: {}'.format(store.get('path'))) print('Number files: {}'.format(data['count'])) print('Total size (bytes): {}'.format(data['size'])) # the task for setting up the data for the workflow setup_task = PythonTask(name='setup_task', callback=setup) # traverse a directory and call the statistics callable for each file walk_task = WalkTask(name='walk_task', path=lambda data, store: store.get('path'), callback=acquire_stats, recursive=True) # print the acquired statistics print_task = PythonTask(name='print_task', callback=print_stats) # create a DAG that runs the setup, walk and print task consecutively. main_dag = Dag('main_dag') main_dag.define({setup_task: walk_task, walk_task: print_task})
def random_sleep(data, store, signal, context): sleep(random() * 4) # the callback function for the task that prints the run times def print_times(data, store, signal, context): dag_log = store.get(key='log.{}'.format(context.dag_name), section=DataStoreDocumentSection.Meta) for task, fields in dag_log.items(): print(task, 'on', fields['worker'], 'took', fields['duration'], 'seconds') # create the main DAG d = Dag('main_dag') # create the sleep tasks sleep_task_1 = PythonTask(name='sleep_task_1', callback=random_sleep) sleep_task_2 = PythonTask(name='sleep_task_2', callback=random_sleep) sleep_task_3 = PythonTask(name='sleep_task_3', callback=random_sleep) # create the print task print_task = PythonTask(name='print_task', callback=print_times) # set up the DAG d.define({ sleep_task_1: sleep_task_2, sleep_task_2: sleep_task_3, sleep_task_3: print_task })
interp_base=request['processing_info']['interp_base']) elif process_type == 'bin': logger.info("binning (not performed yet)") processor.bin(start_doc, requester=request['requester'], proc_info=request['processing_info'], filepath=request['processing_info']['filepath']) elif process_type == 'request_interpolated_data': logger.info("returning interpolated data (not done yet)") processor.return_interp_data( start_doc, requester=request['requester'], filepath=request['processing_info']['filepath']) t2 = ttime.time() print(f"total processing took {t2-t1} sec") # don't create the request anymore #create_req_task = PythonTask(name="create_req_func", callback=create_req_func, #queue='qas-task') process_run_task = PythonTask(name="process_run_func", callback=process_run_func, queue='qas-task') d = Dag("interpolation", queue="qas-dag") d.define({ process_run_task: None, })
data['descriptor'] = descriptor_dict[event['descriptor']] dag_name = signal.start_dag(primary_dag, data=data) # the print("dag name: {}".format(dag_name)) #dag_names.append(dag_name) # I will join after every send for debugging signal.join_dags([dag_name]) # ignore maxnum for now #if MAXNUM is not None and cnt > MAXNUM: #break print("Main job submission finished, found {} images".format(cnt)) #signal.join_dags(dag_names) def make_descriptor_dict(descriptors): desc_dict = dict() for descriptor in descriptors: desc_dict[descriptor['uid']] = descriptor return desc_dict # create the main DAG that spawns others main_dag = Dag('main_dag', queue='cms-main') main_task = PythonTask(name="main_task", callback=main_func, queue="cms-main-task") main_dag.define({ main_task: None, })
Option('recursive', default=True, help='Run recursively', type=bool), Option('iterations', default=1, help='The number of iterations', type=int), Option('threshold', default=0.4, help='The threshold value', type=float) ]) # the callback function that prints the value of the filepath parameter def print_filepath(data, store, signal, context): print('The filepath is:', store.get('filepath')) # the callback function that prints the value of the iterations parameter def print_iterations(data, store, signal, context): print('Number of iterations:', store.get('iterations')) # create the main DAG d = Dag('main_dag') # task for printing the value of the filepath parameter print_filepath_task = PythonTask(name='print_filepath_task', callback=print_filepath) # task for printing the value of the iterations parameter print_iterations_task = PythonTask(name='print_iterations_task', callback=print_iterations) # set up the graph of the DAG, in which the print_filepath_task has to be executed first, # followed by the print_iterations_task. d.define({print_filepath_task: print_iterations_task})
dag_name = dag_names.popleft() if len(dag_names) == 0: stopped = True else: time.sleep(.1) #signal.join_dags(dag_names) def subsub_func(data, store, signal, context): print("completed") main_task = PythonTask(name="main_task", callback=main_func, queue='cms-main-task') main_dag_dict = { main_task: None, } main_dag = Dag("main_dag", autostart=True, queue='cms-main') main_dag.define(main_dag_dict) sub_task = PythonTask(name="test_task", callback=sub_func, queue='cms-primary-task') sub_dag_dict = { sub_task: None, }
def add_filename(data, store, signal, context): store.push('filenames', 'file_b.spec') # the callback function for the task that adds a nested list to the list of filenames and # then extends the list of filenames with two more entries. def add_more_filenames(data, store, signal, context): store.push('filenames', ['nested_a', 'nested_b']) store.extend('filenames', ['file_c.spec', 'file_d.spec']) # create the main DAG d = Dag('main_dag') # create the tasks that call the functions above store_task = PythonTask(name='store_task', callback=store_data) modify_task = PythonTask(name='modify_task', callback=modify_data) add_filename_task = PythonTask(name='add_filename_task', callback=add_filename) add_more_filename_task = PythonTask(name='add_more_filename_task', callback=add_more_filenames) # set up the graph of the DAG, in which the store_task and modify_task are called # in sequence while the add_filename_task and add_more_filename_task are run in parallel. d.define({ store_task: modify_task, modify_task: [add_filename_task, add_more_filename_task]
from lightflow.models import Dag from lightflow.tasks import PythonTask from lightflow_filesystem import GlobTask # the callback function that handles the returned files from the glob task. In this # example it stores them as a list into the data under the key 'files'. def store_files(files, data, store, signal, context): data['files'] = files # the callback for the task that prints the filenames that were returned by the glob task. def print_filenames(data, store, signal, context): print('\n'.join(data['files'])) # create a GlobTask to find all files with the '.file' extension and a PythonTask to # print the result. glob_task = GlobTask(name='glob_task', paths=['/tmp/lightflow_test/'], callback=store_files, pattern='**/*.file', recursive=True) print_task = PythonTask(name='print_task', callback=print_filenames) # create a DAG that runs the glob task first and then the print task. list_dag = Dag('list_dag') list_dag.define({glob_task: print_task})
from lightflow.models.task_data import TaskData, MultiTaskData # TODO : make callback something else callback # from databroker import Broker import matplotlib.pyplot as plt import numpy as np from SciStreams.config import config config['foo'] = 'bar' def test_func(data, store, signal, context): print("printing config\n\n") print(config['foo']) print("done\n\n") config['foo'] = 'far' # create the main DAG that spawns others #img_dag = Dag('img_dag') test_task = PythonTask(name="main", callback=test_func) test_task2 = PythonTask(name="main2", callback=test_func) test_dag_dict = { test_task: test_task2, } test_dag = Dag("test", autostart=True) test_dag.define(test_dag_dict)
worker consuming the 'main' queue, the DAG and the print_task on the second worker, and the print_memory task on the third worker. """ from lightflow.models import Dag from lightflow.tasks import PythonTask # the callback function for the tasks that simply prints the context def print_text(data, store, signal, context): print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id})'.format( **context.to_dict())) # create the main DAG and have it scheduled on the 'graph' queue d = Dag('main_dag', queue='graph') # create the two task, where the first task is executed on the default 'task' queue # while the second task is processed on the 'high_memory' queue print_task = PythonTask(name='print_task', callback=print_text) print_memory = PythonTask(name='print_memory', callback=print_text, queue='high_memory') # set up the graph of the DAG, in which the print_task has to be executed first, # followed by the print_memory task. d.define({print_task: print_memory})