Python PythonTask示例，lightflow.tasks.PythonTask Python示例

示例#1

0

显示文件

def plot_image(data, store, signal, context):
    ''' Store the image.'''
    plot_path = "/home/xf11bm/test.png"
    img = data['image']
    print(img)
    print(np)
    plt.figure(0)
    plt.clf()
    plt.imshow(np.log(img))
    plt.savefig(plot_path)


# create the main DAG that spawns others
main_dag = Dag('main_dag')
main_task = PythonTask(name="main", callback=main)
main_dag.define({
    main_task: None,
})

from SciStreams.XS_Streams import filter_attributes, pick_allowed_detectors

# the secondary circular average task
# create the two tasks for storing and retrieving data
put_task = PythonTask(name='put_task', callback=put_data)
grab_image_task = PythonTask(name='grab_image', callback=grab_image)
plot_image_task = PythonTask(name='plot_image', callback=plot_image)

# set up the graph of the DAG, in which the put_task has to be executed first,
# followed by the print_task.
circavg_dag_dict = {

示例#2

0

显示文件

# dataset in the list of all datasets. The second dataset is referenced by its index==1.
def multiply_data(data, store, signal, context):
    data['value'] = data['value'] * data.get_by_index(1)['value']


# subtract two values by using the aliases of the two datasets and different functions
# for illustration purposes: get_by_alias() and the shorthand notation ([alias])
def subtract_data(data, store, signal, context):
    data['value'] = data.get_by_alias('first')['value'] - data(
        'second')['value']


# create the main DAG based on the diagram above
d = Dag('main_dag')

put_task = PythonTask(name='put_task', callback=put_data)
square_task = PythonTask(name='square_task', callback=square_data)
multiply_task = PythonTask(name='multiply_task', callback=multiply_data)
subtract_task = PythonTask(name='subtract_task', callback=subtract_data)

print_task_1 = PythonTask(name='print_task_1', callback=print_data)
print_task_2 = PythonTask(name='print_task_2', callback=print_data)
print_task_3 = PythonTask(name='print_task_3', callback=print_data)
print_task_4 = PythonTask(name='print_task_4', callback=print_data)

d.define({
    put_task: {
        print_task_1: None,
        square_task: None,
        multiply_task: None,
        subtract_task: 'first'

示例#3

0

显示文件

文件： parallel.py 项目： triplekill/lightflow

from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the tasks
def print_info(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG
d = Dag('main_dag')

# task that limits the branching to certain successor tasks
branch_task = PythonTask(name='branch_task', callback=print_info)

# first task, first lane
lane1_print_task = PythonTask(name='lane1_print_task', callback=print_info)

# first task, second lane
lane2_print_task = PythonTask(name='lane2_print_task', callback=print_info)

# first task, third lane
lane3_print_task = PythonTask(name='lane3_print_task', callback=print_info)

# joins all three lanes together and waits for the predecessor tasks to finish processing
join_task = PythonTask(name='t_join_me', callback=print_info)

# set up the graph of the DAG as illustrated above. Please note how a list of tasks
# defines tasks that are run in parallel (branched out).

示例#4

0

显示文件

from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the task that stores the value 5
def put_data(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id}) '
          'on {worker_hostname}'.format(**context.to_dict()))

    data['value'] = 5


# the callback function for the task that prints the data
def print_value(data, store, signal, context):
    print('The value is: {}'.format(data['value']))


# create the main DAG
d = Dag('main_dag')

# create the two tasks for storing and retrieving data
put_task = PythonTask(name='put_task', callback=put_data)

print_task = PythonTask(name='print_task', callback=print_value)

# set up the graph of the DAG, in which the put_task has to be executed first,
# followed by the print_task.
d.define({put_task: print_task})

示例#5

0

显示文件

        # give it some provenance and data
        new_data = dict(img=img)
        new_data['md'] = md.copy()
        new_data = TaskData(data=new_data)
        new_data = MultiTaskData(dataset=new_data)
        good_attr = filter_attributes(new_data['md'])
        if good_attr:
            print("got a good image")
            # one image dags should go here
            dag_name = signal.start_dag(one_image_dag, data=new_data)
            print("primary node, dag name: {}".format(dag_name))
            dag_names.append(dag_name)
        else:
            print("Bad attributes!")

    signal.join_dags(dag_names)


# create the main DAG that spawns others
#img_dag = Dag('img_dag')
primary_task = PythonTask(name="primary_task",
                          callback=primary_func,
                          queue='cms-primary-task')
primary_dag_dict = {
    primary_task: None,
}

primary_dag = Dag("primary_dag", autostart=True, queue='cms-primary')
primary_dag.define(primary_dag_dict)

示例#6

0

显示文件

# print tasks in lane 1 and lane 2. The successor tasks can be specified by either their
# name or the task object itself. Both methods are shown here.
def branch_with_limit(data, store, signal, context):
    return Action(data, limit=[lane1_print_task, 'lane2_print_task'])


# the callback function for tasks that print the data
def print_value(data, store, signal, context):
    print('Task {} and value {}'.format(context.task_name, data['value']))


# create the main DAG
d = Dag('main_dag')

# task for storing the data
put_task = PythonTask(name='put_task', callback=put_data)

# task that limits the branching to certain successor tasks
branch_task = PythonTask(name='branch_task', callback=branch_with_limit)

# first task, first lane, simply prints the value stored in the put_task
lane1_print_task = PythonTask(name='lane1_print_task', callback=print_value)

# first task, second lane, simply prints the value stored in the put_task
lane2_print_task = PythonTask(name='lane2_print_task', callback=print_value)

# first task, third lane, simply prints the value stored in the put_task
lane3_print_task = PythonTask(name='lane3_print_task', callback=print_value)

# joins all three lanes together and waits for the predecessor tasks to finish processing
join_task = PythonTask(name='t_join_me', callback=print_value)

示例#7

0

显示文件

文件： decision.py 项目： triplekill/lightflow

    data['number'] = random()
    if data['number'] < 0.5:
        return Action(data, limit=[small_number_task])
    else:
        return Action(data, limit=[large_number_task])


# the callback function for the small number route
def print_small_number(data, store, signal, context):
    print('Small number: {}'.format(data['number']))


# the callback function for the large number route
def print_large_number(data, store, signal, context):
    print('Large number: {}'.format(data['number']))


# task definitions
decision_task = PythonTask(name='decision_task', callback=decide_on_successor)

small_number_task = PythonTask(name='small_number_task',
                               callback=print_small_number)

large_number_task = PythonTask(name='large_number_task',
                               callback=print_large_number)

# create the main DAG
d = Dag('main_dag')

d.define({decision_task: [small_number_task, large_number_task]})

示例#8

0

显示文件

def stop(data, store, signal, context):
    raise StopTask('Stop task {} and all successor tasks'.format(
        context.task_name))


# callback for printing the current task context
def print_context(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG
d = Dag('main_dag')

start_task = PythonTask(name='start_task', callback=start_all)

bash_task = BashTask(name='bash_task',
                     command='for i in `seq 1 10`; do echo "$i"; done',
                     callback_stdout=bash_stdout)

stop_noskip_task = PythonTask(name='stop_noskip_task', callback=stop_noskip)

stop_task = PythonTask(name='stop_task', callback=stop)

print_task_1 = PythonTask(name='print_task_1', callback=print_context)

print_task_2 = PythonTask(name='print_task_2', callback=print_context)

print_task_3 = PythonTask(name='print_task_3', callback=print_context)

示例#9

0

显示文件

from lightflow.models import Dag, Parameters, Option
from lightflow.tasks import PythonTask
from lightflow.models.task_data import TaskData, MultiTaskData


def test_func(data, store, signal, context):
    import logging
    logging.basicConfig(filename='/home/xf11bm/SciStreams/SciStreams/test.log',
                        level=logging.DEBUG)
    logging.debug('Testing a log write again')
    #logging.info('So should this')
    #logging.warning('And this, too')


test_task = PythonTask(name="test func", callback=test_func, queue='test')

test_dag_dict = {
    test_task: None,
}

test_dag = Dag("test", autostart=True)
test_dag.define(test_dag_dict)

示例#10

0

显示文件

文件： sub_dag.py 项目： triplekill/lightflow

    dag_names = []
    for i in range(5):
        sleep(1)
        data['image'] = np.ones((100, 100))
        started_dag = signal.start_dag(sub_dag, data=data)
        dag_names.append(started_dag)

    signal.join_dags(dag_names)


# this callback function prints the dimensions of the received numpy array
def sub_dag_print(data, store, signal, context):
    print('Received an image with dimensions: {}'.format(data['image'].shape))


init_task = PythonTask(name='init_task', callback=print_name)

call_dag_task = PythonTask(name='call_dag_task', callback=start_sub_dag)

# create the main dag that runs the init task first, followed by the call_dag task.
main_dag = Dag('main_dag')
main_dag.define({init_task: call_dag_task})

# create the tasks for the sub dag that simply prints the shape of the numpy array
# passed down from the main dag.
print_task = PythonTask(name='print_task', callback=sub_dag_print)

# create the sub dag that is being called by the main dag. In order to stop the
# system from automatically starting the dag when the workflow is run, set the autostart
# parameter to false.
sub_dag = Dag('sub_dag', autostart=False)

示例#11

0

显示文件

文件： chunking_dag.py 项目： triplekill/lightflow

def make_list(data, store, signal, context):
    print(context.task_name)
    data['my_list'] = ['asdf_0001.dat', 'asdf_0002.dat', 'sdfa_0001.dat', 'sdfa_0002.dat', 'sdfa_0003.dat',
                       'blah_0001.dat', '|', 'blah_0002.dat', 'blah2_0001.dat']


def print_list(data, store, signal, context):
    print(context.task_name)
    print('==================================')
    print(data['my_list'])
    print('==================================')


print_dag = Dag('print_dag', autostart=False)

print_list_task = PythonTask(name='print_list',
                             callback=print_list)

print_dag.define({print_list_task: None})


chunk_dag = Dag('chunk_dag')

make_list_task = PythonTask(name='make_list',
                            callback=make_list)

chunk_task = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False,
                          match_pattern='(?P<match>[0-9A-Za-z]*)_', in_key='my_list')

chunk_task2 = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False,
                           match_pattern='[0-9A-Za-z]*_', in_key='my_list')

示例#12

0

显示文件

文件： abort.py 项目： triplekill/lightflow

array, and as this is not the case aborts the workflow gracefully. The abort is
accomplished by using the Abort exception.

"""

from lightflow.models import Dag, AbortWorkflow
from lightflow.tasks import PythonTask


# the callback function for the task that stores the array of three image file names
def collect_data(data, store, signal, context):
    data['images'] = ['img_001.tif', 'img_002.tif', 'img_003.tif']


# the callback function for the task that checks the number of stored file names
def check_data(data, store, signal, context):
    if len(data['images']) < 5:
        raise AbortWorkflow('At least 5 images are required')


# create the main DAG
d = Dag('main_dag')

# create the two tasks for storing and checking data
collect_task = PythonTask(name='collect_task', callback=collect_data)

check_task = PythonTask(name='check_task', callback=check_data)

# set up the graph of the DAG
d.define({collect_task: check_task})

示例#13

0

显示文件

    lightflow worker start -q special

"""

from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the tasks that simply prints the context
def print_text(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG
d = Dag('main_dag')

# create the two task, where the first task is executed on the 'task' queue and the
# second task on the 'special' queue
print_task = PythonTask(name='print_task', callback=print_text)

print_special = PythonTask(name='print_special',
                           callback=print_text,
                           queue='special')

# set up the graph of the DAG, in which the print_task has to be executed first,
# followed by the print_special.
d.define({print_task: print_special})

示例#14

0

显示文件

文件： sequence.py 项目： triplekill/lightflow

    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id}) '
          'on {worker_hostname}'.format(**context.to_dict()))

    if 'value' not in data:
        data['value'] = 0

    data['value'] = data['value'] + 1
    print('This is task #{}'.format(data['value']))


# create the main DAG
d = Dag('main_dag')

# create the 3 tasks that increment a number
task_1 = PythonTask(name='task_1',
                    callback=inc_number)

task_2 = PythonTask(name='task_2',
                    callback=inc_number)

task_3 = PythonTask(name='task_3',
                    callback=inc_number)


# set up the graph of the DAG as a linear sequence of tasks
d.define({
    task_1: task_2,
    task_2: task_3
})

示例#15

0

显示文件

文件： watch_pv.py 项目： AustralianSynchrotron/lightflow-epics

        data['pv_name'] = pvname
        data['pv_value'] = value

        signal.start_dag('pv_action_dag', data=data)
        return data


# the callback function that prints the new PV value after it got changed.
def pv_printout(data, store, signal, context):
    print('PV {} has value: {}'.format(data['pv_name'], data['pv_value']))


# set up the PV monitoring dag.
pv_monitor_dag = Dag('pv_monitor_dag')

startup_task = PythonTask(name='startup_task',
                          callback=startup)

monitor_task = PvTriggerTask(name='monitor_task',
                             pv_name=lambda data, data_store: data_store.get('pvname'),
                             callback=pv_callback,
                             event_trigger_time=0.1,
                             stop_polling_rate=2,
                             skip_initial_callback=True)

pv_monitor_dag.define({startup_task: monitor_task})


# set up the PV action dag.
pv_action_dag = Dag('pv_action_dag', autostart=False)

printout_task = PythonTask(name='printout_task',

示例#16

0

显示文件

文件： list_lines.py 项目： AustralianSynchrotron/lightflow-filesystem


# the callback function that is called as soon as new lines were appended to the text
# file. It stores the new lines into the data and starts the 'print_dag' dag.
def start_print_dag(lines, data, store, signal, context):
    data['lines'] = lines
    signal.start_dag('print_dag', data=data)


# the callback for printing the new lines from the 'print_dag' dag.
def print_lines(data, store, signal, context):
    print('\n'.join(data['lines']))


# create the task that watches for newly appended lines and the associated dag.
new_line_task = NewLineTriggerTask(name='new_line_task',
                                   path='/tmp/lightflow_test/watch_lines.txt',
                                   callback=start_print_dag,
                                   aggregate=None,
                                   use_existing=False,
                                   flush_existing=False)

list_dag = Dag('line_dag')
list_dag.define({new_line_task: None})

# create the print dag and set its autostart value to false.
print_task = PythonTask(name='print_task', callback=print_lines)

print_dag = Dag('print_dag', autostart=False)
print_dag.define({print_task: None})

示例#17

0

显示文件

# acquire some basic statistics for each file as long as it is not a symbolic link
def acquire_stats(entry, data, store, signal, context):
    if not entry.is_symlink():
        data['count'] += 1
        data['size'] += entry.stat(follow_symlinks=False).st_size


# print the acquired statistics
def print_stats(data, store, signal, context):
    print('Statistics for folder: {}'.format(store.get('path')))
    print('Number files: {}'.format(data['count']))
    print('Total size (bytes): {}'.format(data['size']))


# the task for setting up the data for the workflow
setup_task = PythonTask(name='setup_task', callback=setup)

# traverse a directory and call the statistics callable for each file
walk_task = WalkTask(name='walk_task',
                     path=lambda data, store: store.get('path'),
                     callback=acquire_stats,
                     recursive=True)

# print the acquired statistics
print_task = PythonTask(name='print_task', callback=print_stats)

# create a DAG that runs the setup, walk and print task consecutively.
main_dag = Dag('main_dag')
main_dag.define({setup_task: walk_task, walk_task: print_task})

示例#18

0

显示文件

文件： timing.py 项目： RobbieClarken/Lightflow

def random_sleep(data, store, signal, context):
    sleep(random() * 4)


# the callback function for the task that prints the run times
def print_times(data, store, signal, context):
    dag_log = store.get(key='log.{}'.format(context.dag_name),
                        section=DataStoreDocumentSection.Meta)
    for task, fields in dag_log.items():
        print(task, 'on', fields['worker'], 'took', fields['duration'],
              'seconds')


# create the main DAG
d = Dag('main_dag')

# create the sleep tasks
sleep_task_1 = PythonTask(name='sleep_task_1', callback=random_sleep)
sleep_task_2 = PythonTask(name='sleep_task_2', callback=random_sleep)
sleep_task_3 = PythonTask(name='sleep_task_3', callback=random_sleep)

# create the print task
print_task = PythonTask(name='print_task', callback=print_times)

# set up the DAG
d.define({
    sleep_task_1: sleep_task_2,
    sleep_task_2: sleep_task_3,
    sleep_task_3: print_task
})

示例#19

0

显示文件

                interp_base=request['processing_info']['interp_base'])

        elif process_type == 'bin':
            logger.info("binning (not performed yet)")
            processor.bin(start_doc,
                          requester=request['requester'],
                          proc_info=request['processing_info'],
                          filepath=request['processing_info']['filepath'])

        elif process_type == 'request_interpolated_data':
            logger.info("returning interpolated data (not done yet)")
            processor.return_interp_data(
                start_doc,
                requester=request['requester'],
                filepath=request['processing_info']['filepath'])
    t2 = ttime.time()
    print(f"total processing took {t2-t1} sec")


# don't create the request anymore
#create_req_task = PythonTask(name="create_req_func", callback=create_req_func,
#queue='qas-task')
process_run_task = PythonTask(name="process_run_func",
                              callback=process_run_func,
                              queue='qas-task')

d = Dag("interpolation", queue="qas-dag")
d.define({
    process_run_task: None,
})

示例#20

0

显示文件

                data['descriptor'] = descriptor_dict[event['descriptor']]
                dag_name = signal.start_dag(primary_dag, data=data)
                # the
                print("dag name: {}".format(dag_name))
                #dag_names.append(dag_name)
                # I will join after every send for debugging
                signal.join_dags([dag_name])
        # ignore maxnum for now
        #if MAXNUM is not None and cnt > MAXNUM:
        #break
    print("Main job submission finished, found {} images".format(cnt))

    #signal.join_dags(dag_names)


def make_descriptor_dict(descriptors):
    desc_dict = dict()
    for descriptor in descriptors:
        desc_dict[descriptor['uid']] = descriptor
    return desc_dict


# create the main DAG that spawns others
main_dag = Dag('main_dag', queue='cms-main')
main_task = PythonTask(name="main_task",
                       callback=main_func,
                       queue="cms-main-task")
main_dag.define({
    main_task: None,
})

示例#21

0

显示文件

文件： parameters.py 项目： triplekill/lightflow

    Option('recursive', default=True, help='Run recursively', type=bool),
    Option('iterations', default=1, help='The number of iterations', type=int),
    Option('threshold', default=0.4, help='The threshold value', type=float)
])


# the callback function that prints the value of the filepath parameter
def print_filepath(data, store, signal, context):
    print('The filepath is:', store.get('filepath'))


# the callback function that prints the value of the iterations parameter
def print_iterations(data, store, signal, context):
    print('Number of iterations:', store.get('iterations'))


# create the main DAG
d = Dag('main_dag')

# task for printing the value of the filepath parameter
print_filepath_task = PythonTask(name='print_filepath_task',
                                 callback=print_filepath)

# task for printing the value of the iterations parameter
print_iterations_task = PythonTask(name='print_iterations_task',
                                   callback=print_iterations)

# set up the graph of the DAG, in which the print_filepath_task has to be executed first,
# followed by the print_iterations_task.
d.define({print_filepath_task: print_iterations_task})

示例#22

0

显示文件

            dag_name = dag_names.popleft()

            if len(dag_names) == 0:
                stopped = True
        else:
            time.sleep(.1)

    #signal.join_dags(dag_names)


def subsub_func(data, store, signal, context):
    print("completed")


main_task = PythonTask(name="main_task",
                       callback=main_func,
                       queue='cms-main-task')
main_dag_dict = {
    main_task: None,
}

main_dag = Dag("main_dag", autostart=True, queue='cms-main')
main_dag.define(main_dag_dict)

sub_task = PythonTask(name="test_task",
                      callback=sub_func,
                      queue='cms-primary-task')

sub_dag_dict = {
    sub_task: None,
}

示例#23

0

显示文件

def add_filename(data, store, signal, context):
    store.push('filenames', 'file_b.spec')


# the callback function for the task that adds a nested list to the list of filenames and
# then extends the list of filenames with two more entries.
def add_more_filenames(data, store, signal, context):
    store.push('filenames', ['nested_a', 'nested_b'])
    store.extend('filenames', ['file_c.spec', 'file_d.spec'])


# create the main DAG
d = Dag('main_dag')

# create the tasks that call the functions above
store_task = PythonTask(name='store_task',
                        callback=store_data)

modify_task = PythonTask(name='modify_task',
                         callback=modify_data)

add_filename_task = PythonTask(name='add_filename_task',
                               callback=add_filename)

add_more_filename_task = PythonTask(name='add_more_filename_task',
                                    callback=add_more_filenames)

# set up the graph of the DAG, in which the store_task and modify_task are called
# in sequence while the add_filename_task and add_more_filename_task are run in parallel.
d.define({
    store_task: modify_task,
    modify_task: [add_filename_task, add_more_filename_task]

示例#24

0

显示文件

文件： list_files.py 项目： AustralianSynchrotron/lightflow-filesystem

from lightflow.models import Dag
from lightflow.tasks import PythonTask
from lightflow_filesystem import GlobTask


# the callback function that handles the returned files from the glob task. In this
# example it stores them as a list into the data under the key 'files'.
def store_files(files, data, store, signal, context):
    data['files'] = files


# the callback for the task that prints the filenames that were returned by the glob task.
def print_filenames(data, store, signal, context):
    print('\n'.join(data['files']))


# create a GlobTask to find all files with the '.file' extension and a PythonTask to
# print the result.
glob_task = GlobTask(name='glob_task',
                     paths=['/tmp/lightflow_test/'],
                     callback=store_files,
                     pattern='**/*.file',
                     recursive=True)

print_task = PythonTask(name='print_task', callback=print_filenames)

# create a DAG that runs the glob task first and then the print task.
list_dag = Dag('list_dag')
list_dag.define({glob_task: print_task})

示例#25

0

显示文件

from lightflow.models.task_data import TaskData, MultiTaskData

# TODO : make callback something else callback
#
from databroker import Broker
import matplotlib.pyplot as plt
import numpy as np

from SciStreams.config import config

config['foo'] = 'bar'


def test_func(data, store, signal, context):
    print("printing config\n\n")
    print(config['foo'])
    print("done\n\n")
    config['foo'] = 'far'


# create the main DAG that spawns others
#img_dag = Dag('img_dag')
test_task = PythonTask(name="main", callback=test_func)
test_task2 = PythonTask(name="main2", callback=test_func)
test_dag_dict = {
    test_task: test_task2,
}

test_dag = Dag("test", autostart=True)
test_dag.define(test_dag_dict)

示例#26

0

显示文件

文件： queues.py 项目： triplekill/lightflow

worker consuming the 'main' queue, the DAG and the print_task on the second worker, and
the print_memory task on the third worker.

"""

from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the tasks that simply prints the context
def print_text(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG and have it scheduled on the 'graph' queue
d = Dag('main_dag', queue='graph')

# create the two task, where the first task is executed on the default 'task' queue
# while the second task is processed on the 'high_memory' queue
print_task = PythonTask(name='print_task', callback=print_text)

print_memory = PythonTask(name='print_memory',
                          callback=print_text,
                          queue='high_memory')

# set up the graph of the DAG, in which the print_task has to be executed first,
# followed by the print_memory task.
d.define({print_task: print_memory})