def test_pipeline_decrement_stage():

    p = Pipeline()
    s1 = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s1.tasks = t
    s2 = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s2.tasks = t
    p.add_stages([s1, s2])

    p._increment_stage()
    p._increment_stage()
    assert p._stage_count == 2
    assert p._cur_stage == 2
    assert p._completed_flag.is_set() == True

    p._decrement_stage()
    assert p._stage_count == 2
    assert p._cur_stage == 1
    assert p._completed_flag.is_set() == False

    p._decrement_stage()
    assert p._stage_count == 2
    assert p._cur_stage == 0
    assert p._completed_flag.is_set() == False
def test_stage_task_addition():

    s = Stage()
    t1 = Task()
    t1.executable = ['/bin/date']
    t2 = Task()
    t2.executable = ['/bin/date']
    s.add_tasks(set([t1, t2]))

    assert type(s.tasks) == set
    assert s._task_count == 2
    assert t1 in s.tasks
    assert t2 in s.tasks

    s = Stage()
    t1 = Task()
    t1.executable = ['/bin/date']
    t2 = Task()
    t2.executable = ['/bin/date']
    s.add_tasks([t1, t2])

    assert type(s.tasks) == set
    assert s._task_count == 2
    assert t1 in s.tasks
    assert t2 in s.tasks
def get_pipeline(shared_fs=False, size=1):

    p = Pipeline()
    p.name = 'p'

    n = 4

    s1 = Stage()
    s1.name = 's1'
    for x in range(n):
        t = Task()
        t.name = 't%s'%x

        # dd if=/dev/random bs=<byte size of a chunk> count=<number of chunks> of=<output file name>

        t.executable = 'dd'

        if not shared_fs:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s1_t%s.txt'%x]
        else:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=/home/vivek91/s1_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.lfs_per_process = 1024

        s1.add_tasks(t)

    p.add_stages(s1)

    s2 = Stage()
    s2.name = 's2'
    for x in range(n):
        t = Task()
        t.executable = ['dd']

        if not shared_fs:
            t.arguments = ['if=$NODE_LFS_PATH/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s2_t%s.txt'%x]
        else:
            t.arguments = ['if=/home/vivek91/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=/home/vivek91/s2_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.tag = 't%s'%x

        s2.add_tasks(t)


    p.add_stages(s2)

    return p
Exemplo n.º 4
0
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.executable = '/bin/bash'
    t1.arguments = ['-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt']

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    # Create another Stage object to hold character count tasks
    s2 = Stage()

    # Create a Task object
    t2 = Task()
    t2.executable = '/bin/bash'
    t2.arguments = ['-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt']
    # Copy data from the task in the first stage to the current task's location
    t2.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid)]

    # Add the Task to the Stage
    s2.add_tasks(t2)

    # Add Stage to the Pipeline
    p.add_stages(s2)

    # Create another Stage object to hold checksum tasks
    s3 = Stage()

    # Create a Task object
    t3 = Task()
    t3.executable = '/bin/bash'
    t3.arguments = ['-l', '-c', 'sha1sum ccount.txt > chksum.txt']
    # Copy data from the task in the first stage to the current task's location
    t3.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/ccount.txt' % (p.uid, s2.uid, t2.uid)]
    # Download the output of the current task to the current location
    t3.download_output_data = ['chksum.txt > chksum_%s.txt' % cnt]

    # Add the Task to the Stage
    s3.add_tasks(t3)

    # Add Stage to the Pipeline
    p.add_stages(s3)

    return p
def test_stage_check_complete():

    s = Stage()
    t1 = Task()
    t1.executable = ['/bin/date']
    t2 = Task()
    t2.executable = ['/bin/date']
    s.add_tasks([t1, t2])

    assert s._check_stage_complete() == False
    s._set_tasks_state(states.DONE)
    assert s._check_stage_complete() == True
def test_stage_set_tasks_state():

    s = Stage()
    t1 = Task()
    t1.executable = ['/bin/date']
    t2 = Task()
    t2.executable = ['/bin/date']
    s.add_tasks([t1, t2])

    with pytest.raises(ValueError):
        s._set_tasks_state(2)

    s._set_tasks_state(states.DONE)
    assert t1.state == states.DONE
    assert t2.state == states.DONE
def func_for_mock_tmgr_test(mq_hostname, port, pending_queue, completed_queue):

    mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=mq_hostname, port=port))
    mq_channel = mq_connection.channel()

    tasks = list()
    for _ in range(16):
        t = Task()
        t.state = states.SCHEDULING
        t.executable = '/bin/echo'
        tasks.append(t.to_dict())

    tasks_as_json = json.dumps(tasks)
    mq_channel.basic_publish(exchange='',
                             routing_key=pending_queue,
                             body=tasks_as_json)

    cnt = 0
    while cnt < 15:

        method_frame, props, body = mq_channel.basic_get(queue=completed_queue)
        if body:
            task = Task()
            task.from_dict(json.loads(body))
            if task.state == states.DONE:
                cnt += 1
            mq_channel.basic_ack(delivery_tag=method_frame.delivery_tag)

    mq_connection.close()
def generate_pipeline(name, stages):

    # Create a Pipeline object
    p = Pipeline()
    p.name = name


    for s_cnt in range(stages):

        # Create a Stage object
        s = Stage()
        s.name = 'Stage %s'%s_cnt

        for t_cnt in range(5):

            # Create a Task object
            t = Task()
            t.name = 'my-task'        # Assign a name to the task (optional)
            t.executable = '/bin/echo'   # Assign executable to the task
            # Assign arguments for the task executable
            t.arguments = ['I am task %s in %s in %s'%(t_cnt, s_cnt, name)]

            # Add the Task to the Stage
            s.add_tasks(t)

        # Add Stage to the Pipeline
        p.add_stages(s)

    return p
def test_wfp_check_processor():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._mq_hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    wfp.start_processor()
    assert wfp.check_processor()

    wfp.terminate_processor()
    assert not wfp.check_processor()
def test_wfp_initialization(s, i, b, l):

    p = Pipeline()
    st = Stage()
    t = Task()
    t.executable = ['/bin/date']
    st.add_tasks(t)
    p.add_stages(st)

    wfp = WFprocessor(sid='rp.session.local.0000',
                      workflow=set([p]),
                      pending_queue=['pending'],
                      completed_queue=['completed'],
                      mq_hostname=hostname,
                      port=port,
                      resubmit_failed=True)

    assert len(wfp._uid.split('.')) == 2
    assert 'wfprocessor' == wfp._uid.split('.')[0]
    assert wfp._pending_queue == ['pending']
    assert wfp._completed_queue == ['completed']
    assert wfp._mq_hostname == hostname
    assert wfp._port == port
    assert wfp._wfp_process == None
    assert wfp._workflow == set([p])

    if not isinstance(s, unicode):
        wfp = WFprocessor(sid=s,
                          workflow=set([p]),
                          pending_queue=l,
                          completed_queue=l,
                          mq_hostname=s,
                          port=i,
                          resubmit_failed=b)
    def create_single_task():

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['/bin/date']
        t1.copy_input_data = []
        t1.copy_output_data = []

        return t1
def test_pipeline_stage_addition():

    p = Pipeline()
    s1 = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s1.tasks = t
    s2 = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s2.tasks = t
    p.add_stages([s1, s2])

    assert type(p.stages) == list
    assert p._stage_count == 2
    assert p._cur_stage == 1
    assert p.stages[0] == s1
    assert p.stages[1] == s2
def create_single_task():

    t1 = Task()
    t1.name = 'simulation'
    t1.executable = ['/bin/echo']
    t1.arguments = ['hello']
    t1.copy_input_data = []
    t1.copy_output_data = []

    return t1
def test_wfp_workflow_incomplete():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._mq_hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    wfp._initialize_workflow()

    assert wfp.workflow_incomplete()

    amgr.workflow = [p]
    profiler = ru.Profiler(name='radical.entk.temp')

    p.stages[0].state == states.SCHEDULING
    p.state == states.SCHEDULED
    for t in p.stages[0].tasks:
        t.state = states.COMPLETED

    import json
    import pika

    task_as_dict = json.dumps(t.to_dict())
    mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port))
    mq_channel = mq_connection.channel()
    mq_channel.basic_publish(exchange='',
                             routing_key='%s-completedq-1' % amgr._sid,
                             body=task_as_dict)

    amgr._terminate_sync = Event()
    sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread')
    sync_thread.start()

    proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp,))
    proc.start()
    proc.join()

    amgr._terminate_sync.set()
    sync_thread.join()

    assert not wfp.workflow_incomplete()
def test_pipeline_stage_assignment():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.tasks = t
    p.stages = s

    assert type(p.stages) == list
    assert p._stage_count == 1
    assert p._cur_stage == 1
    assert p.stages[0] == s
def test_stage_task_assignment():
    """
    ***Purpose***: Test if necessary attributes are automatically updates upon task assignment
    """

    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.tasks = t

    assert type(s.tasks) == set
    assert s._task_count == 1
    assert t in s.tasks
def test_amgr_synchronizer():

    logger = ru.Logger('radical.entk.temp_logger')
    profiler = ru.Profiler(name='radical.entk.temp')
    amgr = Amgr(hostname=hostname, port=port)

    amgr._setup_mqs()

    p = Pipeline()
    s = Stage()

    # Create and add 100 tasks to the stage
    for cnt in range(100):

        t = Task()
        t.executable = ['some-executable-%s' % cnt]

        s.add_tasks(t)

    p.add_stages(s)
    p._assign_uid(amgr._sid)
    p._validate()

    amgr.workflow = [p]

    for t in p.stages[0].tasks:
        assert t.state == states.INITIAL

    assert p.stages[0].state == states.INITIAL
    assert p.state == states.INITIAL

    # Start the synchronizer method in a thread
    amgr._terminate_sync = Event()
    sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread')
    sync_thread.start()

    # Start the synchronizer method in a thread
    proc = Process(target=func_for_synchronizer_test, name='temp-proc',
                   args=(amgr._sid, p, logger, profiler))

    proc.start()
    proc.join()

    for t in p.stages[0].tasks:
        assert t.state == states.SCHEDULING

    assert p.stages[0].state == states.SCHEDULING
    assert p.state == states.SCHEDULING

    amgr._terminate_sync.set()
    sync_thread.join()
def generate_pipeline():

    def func_condition():

        global CUR_NEW_STAGE, MAX_NEW_STAGE

        if CUR_NEW_STAGE <= MAX_NEW_STAGE:
            return True

        return False

    def func_on_true():

        global CUR_NEW_STAGE
        CUR_NEW_STAGE += 1

        shuffle(p.stages[CUR_NEW_STAGE:])

    def func_on_false():
        print 'Done'

    # Create a Pipeline object
    p = Pipeline()

    for s in range(MAX_NEW_STAGE+1):

        # Create a Stage object
        s1 = Stage()

        for i in range(CUR_TASKS):

            t1 = Task()
            t1.executable = '/bin/sleep'
            t1.arguments = [ '30']

            # Add the Task to the Stage
            s1.add_tasks(t1)

        # Add post-exec to the Stage
        s1.post_exec = {
                        condition': func_condition,
                        on_true': func_on_true,
                        on_false': func_on_false
                        }

        # Add Stage to the Pipeline
        p.add_stages(s1)

    return p
    def create_pipeline():

        p = Pipeline()

        s = Stage()

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['sleep']
        t1.arguments = ['10']

        s.add_tasks(t1)

        p.add_stages(s)

        return p
def test_wfp_enqueue():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._mq_hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    wfp._initialize_workflow()

    amgr.workflow = [p]
    profiler = ru.Profiler(name='radical.entk.temp')

    for t in p.stages[0].tasks:
        assert t.state == states.INITIAL

    assert p.stages[0].state == states.INITIAL
    assert p.state == states.INITIAL

    amgr._terminate_sync = Event()
    sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread')
    sync_thread.start()

    proc = Process(target=func_for_enqueue_test, name='temp-proc', args=(wfp,))
    proc.start()
    proc.join()

    amgr._terminate_sync.set()
    sync_thread.join()

    for t in p.stages[0].tasks:
        assert t.state == states.SCHEDULED

    assert p.stages[0].state == states.SCHEDULED
    assert p.state == states.SCHEDULING
    def create_pipeline():

        p = Pipeline()

        s = Stage()

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['/bin/echo']
        t1.arguments = ['hello']
        t1.copy_input_data = []
        t1.copy_output_data = []

        s.add_tasks(t1)

        p.add_stages(s)

        return p
def generate_pipeline():

    def func_condition():

        p.suspend()
        print 'Suspending pipeline %s for 10 seconds' %p.uid
        sleep(10)
        return True

    def func_on_true():

        print 'Resuming pipeline %s' %p.uid
        p.resume()

    def func_on_false():
        pass

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    for i in range(10):

        t1 = Task()
        t1.executable = '/bin/sleep'
        t1.arguments = ['30']

        # Add the Task to the Stage
        s1.add_tasks(t1)

    # Add post-exec to the Stage
    s1.post_exec = {
        'condition': func_condition,
        'on_true': func_on_true,
        'on_false': func_on_false
    }

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.executable = ['/bin/sleep']
    t1.arguments = ['300']

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.executable = ['mv']
    t1.arguments = ['temp','/tmp/']
    t1.upload_input_data = ['%s/temp'%cur_dir]

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def test_wfp_initialize_workflow():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    wfp = WFprocessor(sid='test',
                      workflow=[p],
                      pending_queue=list(),
                      completed_queue=list(),
                      mq_hostname=hostname,
                      port=port,
                      resubmit_failed=False)

    wfp._initialize_workflow()
    assert p.uid is not None
    assert p.stages[0].uid is not None
    for t in p.stages[0].tasks:
        assert t.uid is not None
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    for x in range(10):
        t1 = Task()
        t1.executable = 'cat'
        t1.arguments = ['file1.txt','file2.txt','>','output.txt']
        t1.copy_input_data = ['$SHARED/file1.txt', '$SHARED/file2.txt']
        t1.download_output_data = ['output.txt > %s/output_%s.txt' %(cur_dir,x+1)]

        # Add the Task to the Stage
        s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
    def func_on_true():

        global CUR_NEW_STAGE

        CUR_NEW_STAGE += 1

        s = Stage()

        for i in range(10):
            t = Task()
            t.executable = '/bin/sleep'
            t.arguments = [ '30']

            s.add_tasks(t)

        # Add post-exec to the Stage
        s.post_exec = {
                        'condition': func_condition,
                        'on_true': func_on_true,
                        'on_false': func_on_false
                    }

        p.add_stages(s)
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.name = 't1'
    t1.executable = ['/bin/false']
    # t1.arguments = ['"Hello World"','>>','temp.txt']

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create 4K tasks to ensure we don't hit any RMQ connection drops
    for _ in range(4096):
        t1 = Task()
        t1.executable = ['/bin/echo']
        t1.arguments = ['"Hello World"']

        # Add the Task to the Stage
        s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def test_amgr_run_mock():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.name = 'simulation'
    t.executable = ['/bin/date']
    s.tasks = t
    p.add_stages(s)

    res_dict = {

            'resource': 'local.localhost',
            'walltime': 5,
            'cpus': 1,
            'project': ''

    }

    appman = Amgr(hostname=hostname, port=port, rts="mock")
    appman.resource_desc = res_dict

    appman.workflow = [p]
    appman.run()
Exemplo n.º 31
0
def test_rp_da_scheduler_bw():
    """
    **Purpose**: Run an EnTK application on localhost
    """

    p1 = Pipeline()
    p1.name = 'p1'

    n = 10

    s1 = Stage()
    s1.name = 's1'
    for x in range(n):
        t = Task()
        t.name = 't%s' % x
        t.executable = ['/bin/hostname']
        t.arguments = ['>', 'hostname.txt']
        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 16
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.lfs_per_process = 10
        t.download_output_data = ['hostname.txt > s1_t%s_hostname.txt' % (x)]

        s1.add_tasks(t)

    p1.add_stages(s1)

    s2 = Stage()
    s2.name = 's2'
    for x in range(n):
        t = Task()
        t.executable = ['/bin/hostname']
        t.arguments = ['>', 'hostname.txt']
        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 16
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.download_output_data = ['hostname.txt > s2_t%s_hostname.txt' % (x)]
        t.tag = 't%s' % x

        s2.add_tasks(t)

    p1.add_stages(s2)

    res_dict = {
        'resource': 'ncsa.bw_aprun',
        'walltime': 10,
        'cpus': 128,
        'project': 'gk4',
        'queue': 'high'
    }

    os.environ['RADICAL_PILOT_DBURL'] = MLAB

    appman = AppManager(hostname=hostname, port=port)
    appman.resource_desc = res_dict
    appman.workflow = [p1]
    appman.run()

    for i in range(n):
        assert open('s1_t%s_hostname.txt' % i,
                    'r').readline().strip() == open('s2_t%s_hostname.txt' % i,
                                                    'r').readline().strip()

    txts = glob('%s/*.txt' % os.getcwd())
    for f in txts:
        os.remove(f)
Exemplo n.º 32
0
def create_windowing_stage(cmt_file_db, param_path, task_counter):
    """This function creates the ASDF windowing stage.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param pipelinedir: path to pipeline directory
    :return: EnTK Stage

    """

    # Get database parameter path
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Windowing parameter file directory
    window_process_dir = os.path.join(param_path, "CreateWindows")

    # Window path list
    # Important step! This creates a windowing list prior to having created
    # the actual window path files. It is tested so it definitely works!
    # This way the windowing processes can be distributed for each ASDF file
    # pair on one processor (No MPI support!)

    window_path_list, _ = get_windowing_list(cmt_file_db,
                                             window_process_dir,
                                             verbose=False)

    # Process path function
    window_func = os.path.join(bin_path, "window_selection_asdf.py")

    # The following little work around help getting around the fact that
    # multiple tasks cannot read the same file.
    # Create two stages one for #bodywaves or general entries and one for
    # surfaces waves.
    bodywave_list = []
    surfacewave_list = []
    for file in window_path_list:
        name = os.path.basename(file)
        if "surface" in name:
            surfacewave_list.append(file)
        else:
            bodywave_list.append(file)

    stage_list = []
    if len(bodywave_list) > 0:
        stage_list.append(bodywave_list)
    if len(surfacewave_list) > 0:
        stage_list.append(surfacewave_list)

    # List of stages
    stages = []

    for window_list in stage_list:
        # Create Process Paths Stage (CPP)
        # Create a Stage object
        window_stage = Stage()
        window_stage.name = "Windowing"

        # Loop over process path files
        for window_path in window_list:

            # Create Task
            window_task = Task()

            # This way the task gets the name of the path file
            window_task.name = os.path.basename(window_path)

            window_task.pre_exec = [  # Conda activate
                DB_params["conda-activate"]
            ]

            window_task.executable = [DB_params["bin-python"]]  # Assign exec
            # to the task

            # Create Argument list
            arguments = [window_func, "-f", window_path]
            if DB_params["verbose"]:
                arguments.append("-v")

            window_task.arguments = arguments

            # In the future maybe to database dir as a total log?
            window_task.stdout = os.path.join(
                "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
                (Cid, str(task_counter).zfill(4), window_task.name))

            window_task.stderr = os.path.join(
                "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
                (Cid, str(task_counter).zfill(4), window_task.name))

            window_stage.add_tasks(window_task)

            task_counter += 1

        stages.append(window_stage)

    return stages, task_counter
Exemplo n.º 33
0
    def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable,
                     ExchangeMethod):
        """
        All cycles after the initial cycle
        Pulls up exchange pairs file and generates the new workflow
        """

        self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid)
        with open('exchangePairs_{0}.dat'.format(Cycle),
                  'r') as f:  # Read exchangePairs.dat
            ExchangeArray = []
            for line in f:
                ExchangeArray.append(int(line.split()[1]))
                #ExchangeArray.append(line)
                #print ExchangeArray

        q = Pipeline()
        q.name = 'genpipeline{0}'.format(Cycle)
        #Bookkeeping
        stage_uids = list()
        task_uids = list()  ## = dict()
        md_dict = dict()

        #Create initial MD stage

        md_stg = Stage()
        md_stg.name = 'mdstage{0}'.format(Cycle)

        self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid)

        for r in range(Replicas):
            md_tsk = AMBERTask(cores=Replica_Cores,
                               MD_Executable=MD_Executable)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r,
                                                           cycle=Cycle)
            md_tsk.link_input_data = [
                '%s/restrt > inpcrd' %
                (self.Book[Cycle - 1][ExchangeArray[r]]),
                '%s/prmtop' % (self.Book[0][r]),
                #'%s/prmtop'%(self.Tarball_path[0]),
                '%s/mdin_{0}'.format(r) % (self.Book[0][r])
            ]

            #'%s/mdin'%(self.Book[0][r])]
            #'%s/mdin'%(self.Tarball_path[0])]

            md_tsk.arguments = [
                '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c',
                'inpcrd', '-o', 'out_{0}'.format(r), '-inf',
                'mdinfo_{0}'.format(r)
            ]
            #md_tsk.arguments       = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                q.name, md_stg.name, md_tsk.name)
            self.md_task_list.append(md_tsk)
            md_stg.add_tasks(md_tsk)

        q.add_stages(md_stg)

        ex_stg = Stage()
        ex_stg.name = 'exstg{0}'.format(Cycle + 1)

        #Create Exchange Task
        ex_tsk = Task()
        ex_tsk.name = 'extsk{0}'.format(Cycle + 1)
        ex_tsk.executable = ['python']
        ex_tsk.upload_input_data = [ExchangeMethod]
        for r in range(Replicas):

            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]

        ex_tsk.arguments = [
            'TempEx.py', '{0}'.format(Replicas), '{0}'.format(Cycle + 1)
        ]
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = [
            'exchangePairs_{0}.dat'.format(Cycle + 1)
        ]  # Finds exchange partners, also  Generates exchange history trace

        ex_stg.add_tasks(ex_tsk)

        #task_uids.append(ex_tsk.uid)
        self.ex_task_list.append(ex_tsk)

        q.add_stages(ex_stg)

        #stage_uids.append(ex_stg.uid)

        self.Book.append(md_dict)
        #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid)
        #print d
        #print self.Book
        return q
Exemplo n.º 34
0
def main(cmt_filename):
    '''This tiny function runs shit

    Args:
        cmt_filename: str containing the path to the cmt solution that is
                      supposed to be inverted for

    Usage:
        From the commandline:
            python pipeline <path/to/cmtsolution>

    '''

    # Path to pipeline file
    pipelinepath = os.path.abspath(__file__)
    pipelinedir = os.path.dirname(pipelinepath)

    # Define parameter directory
    param_path = os.path.join(os.path.dirname(pipelinedir), "params")
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")
    DB_params = read_yaml_file(databaseparam_path)
    print(DB_params)

    # Earthquake specific database parameters
    # Dir and eq_id
    eq_dir, eq_id = get_eq_entry_path(DB_params["databasedir"], cmt_filename)
    # Earthquake file in the database
    cmt_file_db = os.path.join(eq_dir, "eq_" + eq_id + ".cmt")

    # Create a Pipeline object
    p = Pipeline()

    # ---- DATABASE ENTRY TASK ---------------------------------------------- #

    # Path to function
    create_database_func = os.path.join(pipelinedir,
                                        "01_Create_Database_Entry.py")

    # Create a Stage object
    database_entry = Stage()

    t1 = Task()
    t1.name = 'database-entry'
    t1.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    t1.executable = [DB_params['bin-python']]  # Assign executable to the task
    t1.arguments = [create_database_func, os.path.abspath(cmt_filename)]

    # In the future maybe to database dir as a total log?
    t1.stdout = os.path.join(pipelinedir,
                             "database-entry." + eq_id + ".stdout")
    t1.stderr = os.path.join(pipelinedir,
                             "database-entry." + eq_id + ".stderr")

    # Add Task to the Stage
    database_entry.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(database_entry)

    # # ---- REQUEST DATA ----------------------------------------------------- #
    #
    # # Path to function
    # request_data_func = os.path.join(pipelinedir, "02_Request_Data.py")
    #
    # # Create a Stage object
    # datarequest = Stage()
    #
    # datarequest_t = Task()
    # datarequest_t.name = 'data-request'
    # datarequest_t.pre_exec = [  # Conda activate
    #     DB_params["conda-activate"]]
    # datarequest_t.executable = [DB_params['bin-python']]  # Assign executable
    #                                                       # to the task
    # datarequest_t.arguments = [request_data_func, cmt_file_db]
    #
    # # In the future maybe to database dir as a total log?
    # datarequest_t.stdout = os.path.join(pipelinedir,
    #                                   "datarequest." + eq_id + ".stdout")
    # datarequest_t.stderr = os.path.join(pipelinedir,
    #                                   "datarequest." + eq_id + ".stderr")
    #
    # # Add Task to the Stage
    # datarequest.add_tasks(datarequest_t)
    #
    # # Add Stage to the Pipeline
    # p.add_stages(datarequest)

    # ---- Write Sources ---------------------------------------------------- #

    # Path to function
    write_source_func = os.path.join(pipelinedir, "03_Write_Sources.py")

    # Create a Stage object
    w_sources = Stage()
    w_sources.name = 'Write-Sources'

    # Create Task for stage
    w_sources_t = Task()
    w_sources_t.name = 'Write-Sources'
    w_sources_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    w_sources_t.executable = [DB_params['bin-python']]  # Assign executable
    # to the task
    w_sources_t.arguments = [write_source_func, cmt_file_db]

    # In the future maybe to database dir as a total log?
    w_sources_t.stdout = os.path.join(pipelinedir,
                                      "write_sources." + eq_id + ".stdout")
    w_sources_t.stderr = os.path.join(pipelinedir,
                                      "write_sources." + eq_id + ".stderr")

    # Add Task to the Stage
    w_sources.add_tasks(w_sources_t)

    # Add Stage to the Pipeline
    p.add_stages(w_sources)

    # ---- Run Specfem ----------------------------------------------------- #

    specfemspec_path = os.path.join(param_path,
                                    "SpecfemParams/SpecfemParams.yml")
    comp_and_modules_path = os.path.join(
        param_path, "SpecfemParams/"
        "CompilersAndModules.yml")

    # Load Parameters
    specfemspecs = read_yaml_file(specfemspec_path)
    cm_dict = read_yaml_file(comp_and_modules_path)

    attr = [
        "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp",
        "CMT_depth", "CMT_lat", "CMT_lon"
    ]

    simdir = os.path.join(eq_dir, "CMT_SIMs")

    # Create a Stage object
    runSF3d = Stage()
    runSF3d.name = 'Simulation'

    for at in attr[0]:
        sf_t = Task()
        sf_t.name = 'run-' + at

        # Module Loading
        sf_t.pre_exec = [  # Get rid of existing modules
            'module purge'
        ]
        for module in cm_dict["modulelist"]:
            sf_t.pre_exec.append("module load %s" % module)
        sf_t.pre_exec.append("module load %s" % cm_dict["gpu_module"])

        # Change directory to specfem directories
        sf_t.pre_exec.append(  # Change directory
            "cd %s" % os.path.join(simdir, at))

        sf_t.executable = ['./bin/xspecfem3D']  # Assign executable

        # In the future maybe to database dir as a total log?
        sf_t.stdout = os.path.join(pipelinedir,
                                   "run_specfem." + eq_id + ".stdout")
        sf_t.stderr = os.path.join(pipelinedir,
                                   "run_specfem." + eq_id + ".stderr")

        sf_t.gpu_reqs = {
            'processes': 6,
            'process_type': 'MPI',
            'threads_per_process': 1,
            'thread_type': 'OpenMP'
        }

        # Add Task to the Stage
        runSF3d.add_tasks(sf_t)

    # Add Simulation stage to the Pipeline
    p.add_stages(runSF3d)

    # Create Application Manager
    appman = AppManager(hostname=hostname, port=port)

    # Create a dictionary describe four mandatory keys:
    # resource, walltime, and cpus
    # resource is 'local.localhost' to execute locally
    res_dict = {
        'resource': 'princeton.tiger_gpu',
        'project': 'geo',
        'queue': 'gpu',
        'schema': 'local',
        'walltime': 300,
        'cpus': 2,
        'gpus': 6
    }

    # Assign resource request description to the Application Manager
    appman.resource_desc = res_dict

    # Assign the workflow as a set or list of Pipelines to the Application Manager
    # Note: The list order is not guaranteed to be preserved
    appman.workflow = set([p])

    # Run the Application Manager
    appman.run()
Exemplo n.º 35
0
    Replicas = 24
    Replica_Cores = 1

    Pilot_Cores = Replicas * Replica_Cores

    
    for N_Stg in range(Stages):
        stg =  Stage() ## initialization
        task_uids['Stage_%s'%N_Stg] = list()

        #####Initial MD stage  

        if N_Stg == 0:
            for n0 in range(Replicas):
                t = Task()
                t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI']  #MD Engine  
                t.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] 
                t.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] 
                t.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out']
                t.cores = Replica_Cores
                stg.add_tasks(t)
                task_uids['Stage_%s'%N_Stg].append(t.uid)
            p.add_stages(stg)
            stage_uids.append(stg.uid) 



        #####Exchange Stages    
        elif N_Stg != 0 and N_Stg%2 = 1:
            t = Task()
            t.executable = ['python']
Exemplo n.º 36
0
def test_wfp_dequeue():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._mq_hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    wfp._initialize_workflow()

    amgr.workflow = [p]
    profiler = ru.Profiler(name='radical.entk.temp')

    assert p.stages[0].state == states.INITIAL
    assert p.state == states.INITIAL
    for t in p.stages[0].tasks:
        assert t.state == states.INITIAL

    p.stages[0].state == states.SCHEDULING
    p.state == states.SCHEDULED
    for t in p.stages[0].tasks:
        t.state = states.COMPLETED

    import json
    import pika

    task_as_dict = json.dumps(t.to_dict())
    mq_connection = pika.BlockingConnection(
        pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port))
    mq_channel = mq_connection.channel()
    mq_channel.basic_publish(exchange='',
                             routing_key='%s-completedq-1' % amgr._sid,
                             body=task_as_dict)

    amgr._terminate_sync = Event()
    sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread')
    sync_thread.start()

    proc = Process(target=func_for_dequeue_test,
                   name='temp-proc',
                   args=(wfp, ))
    proc.start()
    proc.join()

    amgr._terminate_sync.set()
    sync_thread.join()

    for t in p.stages[0].tasks:
        assert t.state == states.DONE

    assert p.stages[0].state == states.DONE
    assert p.state == states.DONE
Exemplo n.º 37
0
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod):

    """
    All cycles after the initial cycle
    """

    with open("exchangePairs.dat","r") as f:  # Read exchangePairs.dat
        ExchangeArray = []
        for line in f:
            ExchangeArray.append(int(line.split()[1]))
            #ExchangeArray.append(line)
            #print ExchangeArray
                

    q = Pipeline()
    #Bookkeeping
    stage_uids = list()
    task_uids = list() ## = dict()
    md_dict = dict()


    #Create initial MD stage


    md_stg = Stage()
    for r in range (Replicas):
        md_tsk                 = Task()
        md_tsk.executable      = [MD_Executable]  #MD Engine, Blue Waters
        md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]),
                                  '%s/prmtop'%(Book[Cycle-1][r]),
                                  #'%s/mdin_{0}'.format(r)%(Book[k-1][r])]
                                  '%s/mdin'%(Book[Cycle-1][r])]

        md_tsk.pre_exec        = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user?
        #md_tsk.pre_exec       = ['module load amber']
        #md_tsk.arguments      = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments       = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
        md_tsk.cores           = Replica_Cores
        md_tsk.mpi             = True
        md_dict[r]             = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)
        md_stg.add_tasks(md_tsk)

        #task_uids.append(md_tsk.uid)
    q.add_stages(md_stg)
             
                                                                                         
                                                                                          
    ex_stg= Stage()
    #Create Exchange Task
    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range (Replicas):
        #print d[n1]

        ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)]

    ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    #task_uids.append(ex_tsk.uid)
    q.add_stages(ex_stg)
    #stage_uids.append(ex_stg.uid)
    Book.append(md_dict)
        #print d
        #print Book
    return q

#p = InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod)
#q = Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod)

#return (p, q)
                                                                            
Exemplo n.º 38
0
for i in range(ntasks):
    t = Task()
    t.cpu_reqs = {
        'processes': 1,
        'process_type': None,
        'threads_per_process': 4,
        'thread_type': 'OpenMP'
    }
    t.gpu_reqs = {
        'processes': 0,
        'process_type': None,
        'threads_per_process': 0,
        'thread_type': None
    }
    t.executable = PYTHON
    t.arguments = [
        f'{current_dir}/simulation.py', f'{run_dir}/simulations/all',
        ADIOS_XML, i, aggregator_dir
    ]
    s.add_tasks(t)

t = Task()
t.cpu_reqs = {
    'processes': 1,
    'process_type': None,
    'threads_per_process': 4,
    'thread_type': 'OpenMP'
}
t.gpu_reqs = {
    'processes': 0,
Exemplo n.º 39
0
def generate_pipeline():

    global CUR_TASKS, CUR_CORES, duration, MAX_NEW_STAGE

    def func_condition():

        global CUR_NEW_STAGE, MAX_NEW_STAGE

        if CUR_NEW_STAGE < MAX_NEW_STAGE - 1:
            return True

        return False

    def func_on_true():

        global CUR_NEW_STAGE
        CUR_NEW_STAGE += 1
        for t in p.stages[CUR_NEW_STAGE].tasks:
            cores = randint(1, 20)
            t.arguments = ['-c', str(cores), '-t', str(duration)]

    def func_on_false():
        print 'Done'

    # Create a Pipeline object
    p = Pipeline()

    for s in range(MAX_NEW_STAGE + 1):

        # Create a Stage object
        s1 = Stage()

        for i in range(CUR_TASKS):

            t1 = Task()
            t1.pre_exec = [
                'export PATH=/u/sciteam/balasubr/modules/stress-ng-0.09.34:$PATH'
            ]
            t1.executable = ['stress-ng']
            t1.arguments = ['-c', str(CUR_CORES), '-t', str(duration)]
            t1.cpu_reqs = {
                'processes': 1,
                'process_type': '',
                'threads_per_process': CUR_CORES,
                'thread_type': ''
            }

            # Add the Task to the Stage
            s1.add_tasks(t1)

        # Add post-exec to the Stage
        s1.post_exec = {
            'condition': func_condition,
            'on_true': func_on_true,
            'on_false': func_on_false
        }

        # Add Stage to the Pipeline
        p.add_stages(s1)

    return p
Exemplo n.º 40
0
if __name__ == '__main__':

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # List to hold uids of Tasks of Stage 1
    s1_task_uids = list()

    for cnt in range(10):

        # Create a Task object
        t = Task()
        t.executable = '/bin/echo'  # Assign executable to the task
        t.arguments = ['I am task %s in %s' % (cnt, s1.name)
                       ]  # Assign arguments for the task executable

        # Add the Task to the Stage
        s1.add_tasks(t)

        # Add Task uid to list
        s1_task_uids.append(t.uid)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    # Create another Stage object
    s2 = Stage()
Exemplo n.º 41
0
    def generate_pipeline(self):

        pipeline = Pipeline()

        # generate replicas
        # create a wrapper task that assigns the values of replica_i and replica_j
        # =================

        stage_1 = Stage() 

        for _gibbs_step in range(self.n_gibbs_steps): 

            task = Task() # assign replica_i and replica_j
            task.name = assign_replica_numbers

            task.executable = [NULL]
            task.cores = self.cores
            task.arguments = ['I am task %s'%_gibbs_step]
            stage_1.add_tasks(task)


        pipeline.add_stages(stage_1)

        # replica exchange Metropolis criteria
        # invoke repex from RepEx 3.0 
        # =================

        stage_2 = Stage()

        for _gibbs_step in range(self.n_gibbs_steps): 
        
            task = Task()
            task.name = repex 

            task.executable = [NULL]
            task.cores = self.cores
            task.arguments = ['I am task %s'%_gibbs_step]
            stage_2.add_tasks(task)

        pipeline.add_stages(stage_2)

        # rotation (MC) 
        # =================

        stage_3 = Stage()

        for replica in range(self.number_of_replicas):

            task = Task()
            task.name = rotation  

            task.executable = [NULL]
            task.cores = self.cores
            task.arguments = ['I am task %s'%replica]
            stage_3.add_tasks(task)

        pipeline.add_stages(stage_3)

        # translation (MC)
        # =================

        stage_4 = Stage()

        
        for replica in range(self.number_of_replicas):

            task = Task()
            task.name = rotation  

            task.executable = [NULL]
            task.cores = self.cores
            task.arguments = ['I am task %s'%replica]
            stage_4.add_tasks(task)

        pipeline.add_stages(stage_4)

        # propagation (MC)
        # =================

        stage_5 = Stage()

        for replica in range(self.number_of_replicas):

            task = Task()
            task.name = rotation  

            task.executable = [NULL]
            task.cores = self.cores
            task.arguments = ['I am task %s'%replica]
            stage_5.add_tasks(task)

        pipeline.add_stages(stage_5)

        # energy matrix 
        # for every replica pull the sampler state
        # compute the energy matrix of each thermo state in thermo_matrix, given that replica's sampler state
        # =================

        stage_6 = Stage()

        for replica in range(self.number_of_replicas):

            for thermo_state in range(self.thermo_state)

                task = Task()
                task.name = rotation  

                task.executable = [NULL]
                task.cores = self.cores
                task.arguments = ['I am task %s'%replica]
                stage_6.add_tasks(task)

        pipeline.add_stages(stage_6)


        print 'TIES pipeline has', len(pipeline.stages), 'stages. Tasks counts:', [len(s.tasks) for s in pipeline.stages]
        return pipeline
Exemplo n.º 42
0
    def generate_MD_stage(num_MD=1):
        """
        Function to generate MD stage. 
        """
        s1 = Stage()
        s1.name = 'MD'
        initial_MD = True
        outlier_filepath = '%s/restart_points.json' % outlier_path

        if os.path.exists(outlier_filepath):
            initial_MD = False
            outlier_file = open(outlier_filepath, 'r')
            outlier_list = json.load(outlier_file)
            outlier_file.close()

        # MD tasks
        time_stamp = int(time.time())
        for i in range(num_MD):
            t1 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_exps/fs-pep/run_openmm.py
            t1.pre_exec = [
                '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'
            ]
            t1.pre_exec += ['module load cuda/9.1.85']
            t1.pre_exec += ['conda activate %s' % conda_path]
            t1.pre_exec += [
                'export PYTHONPATH=%s/MD_exps:$PYTHONPATH' % base_path
            ]
            t1.pre_exec += ['cd %s' % md_path]
            t1.pre_exec += [
                'mkdir -p omm_runs_%d && cd omm_runs_%d' %
                (time_stamp + i, time_stamp + i)
            ]
            t1.executable = ['%s/bin/python' % conda_path]  # run_openmm.py
            t1.arguments = ['%s/run_openmm.py' % md_path]
            if top_file:
                t1.arguments += ['--topol', top_file]

            # pick initial point of simulation
            if initial_MD or i >= len(outlier_list):
                t1.arguments += ['--pdb_file', pdb_file]
#                 t1.arguments += ['--length', LEN_initial]
#                print "Running from initial frame for %d ns. " % LEN_initial
            elif outlier_list[i].endswith('pdb'):
                t1.arguments += ['--pdb_file', outlier_list[i]]
                #                 t1.arguments += ['--length', LEN_iter]
                t1.pre_exec += ['cp %s ./' % outlier_list[i]]
#                print "Running from outlier %s for %d ns" % (outlier_list[i], LEN_iter)
            elif outlier_list[i].endswith('chk'):
                t1.arguments += ['--pdb_file', pdb_file, '-c', outlier_list[i]]
                #                 t1.arguments += ['--length', LEN_iter]
                t1.pre_exec += ['cp %s ./' % outlier_list[i]]
#                print "Running from checkpoint %s for %d ns" % (outlier_list[i], LEN_iter)

# how long to run the simulation
            if initial_MD:
                t1.arguments += ['--length', LEN_initial]
            else:
                t1.arguments += ['--length', LEN_iter]

            # assign hardware the task
            t1.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 4,
                'thread_type': 'OpenMP'
            }
            t1.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }

            # Add the MD task to the simulating stage
            s1.add_tasks(t1)
        return s1
Exemplo n.º 43
0
    def generate_ML_stage(num_ML=1):
        """
        Function to generate the learning stage
        """
        s3 = Stage()
        s3.name = 'learning'

        # learn task
        time_stamp = int(time.time())
        for i in range(num_ML):
            t3 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py
            t3.pre_exec = []
            #t3.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
            #t3.pre_exec += ['module load cuda/9.1.85']
            #t3.pre_exec += ['conda activate %s' % conda_path]

            #t3.pre_exec += ['module unload python']
            #t3.pre_exec += ['. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
            #        'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
            #        'conda deactivate',
            #        'conda deactivate',
            #        'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2']
            #t3.pre_exec += ['module load ibm-wml-ce']
            t3.pre_exec += [
                'module unload prrte', 'module unload python',
                'module load xl', 'module load xalt',
                'module load spectrum-mpi', 'module load cuda', 'module list'
            ]
            t3.pre_exec += [
                'export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path
            ]
            t3.pre_exec += ['cd %s' % cvae_path]
            dim = i + 3
            cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp + i)
            t3.pre_exec += [
                'mkdir -p {0}/benchmarks && cd {0}'.format(cvae_dir)
            ]
            #t3.pre_exec += ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/ddlrun /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim)]
            t3.pre_exec += ['module load ibm-wml-ce', 'env']
            t3.pre_exec += [
                '. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
                'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
                'conda deactivate', 'conda deactivate',
                'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2'
            ]
            t3.pre_exec += ['export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5"']
            t3.pre_exec += [
                'jsrun --erf_input /gpfs/alpine/med110/scratch/atrifan2/covid19/PLPro/entk_cvae_md_hvd/RANKFILE /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s'
                % (cvae_path, dim)
            ]
            #t3.pre_exec += ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/ddlrun /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim)]
            t3.executable = [
                'date'
            ]  #t3.executable = ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python']
            #t3.arguments = [ '%s/cvae/train_cvae.py' % (cvae_path),
            #        '-f', '../bytes-train.tfrecords',
            #        '--dim', dim]
            #t3.executable = ['%s/bin/python' % conda_path]  # train_cvae.py
            #t3.arguments = ['%s/train_cvae.py' % cvae_path,
            #        '--h5_file', '%s/cvae_input.h5' % agg_path,
            #        '--dim', dim]

            t3.cpu_reqs = {
                'processes': 6,
                'process_type': 'MPI',
                'threads_per_process': 4,
                'thread_type': 'OpenMP'
            }
            t3.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }

            # Add the learn task to the learning stage
            s3.add_tasks(t3)

        return s3
Exemplo n.º 44
0
    def InitCycle(
        self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps
    ):  # "Cycle" = 1 MD stage plus the subsequent exchange computation
        """ 
        Initial cycle consists of:
        1) Create tarball of MD input data 
        2) Transfer the tarball to pilot sandbox
        3) Untar the tarball
        4) Run first Cycle
        """

        #Initialize Pipeline
        #self._prof.prof('InitTar', uid=self._uid)
        p = Pipeline()
        p.name = 'initpipeline'

        md_dict = dict()  #Bookkeeping
        tar_dict = dict()  #Bookkeeping

        ##Write the input files

        self._prof.prof('InitWriteInputs', uid=self._uid)

        writeInputs.writeInputs(max_temp=350,
                                min_temp=250,
                                replicas=Replicas,
                                timesteps=timesteps)

        self._prof.prof('EndWriteInputs', uid=self._uid)

        self._prof.prof('InitTar', uid=self._uid)
        #Create Tarball of input data

        tar = tarfile.open("Input_Files.tar", "w")
        for name in ["prmtop", "inpcrd", "mdin"]:
            tar.add(name)
        for r in range(Replicas):
            tar.add('mdin_{0}'.format(r))
        tar.close()

        #delete all input files outside the tarball

        for r in range(Replicas):
            os.remove('mdin_{0}'.format(r))

        self._prof.prof('EndTar', uid=self._uid)

        #Create Untar Stage

        untar_stg = Stage()
        untar_stg.name = 'untarStg'

        #Untar Task

        untar_tsk = Task()
        untar_tsk.name = 'untartsk'
        untar_tsk.executable = ['python']

        untar_tsk.upload_input_data = [
            'untar_input_files.py', 'Input_Files.tar'
        ]
        untar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar']
        untar_tsk.cores = 1

        untar_stg.add_tasks(untar_tsk)
        p.add_stages(untar_stg)

        tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % (
            p.name, untar_stg.name, untar_tsk.name)

        # First MD stage: needs to be defined separately since workflow is not built from a predetermined order

        md_stg = Stage()
        md_stg.name = 'mdstg0'
        self._prof.prof('InitMD_0', uid=self._uid)

        # MD tasks

        for r in range(Replicas):

            md_tsk = AMBERTask(cores=Replica_Cores,
                               MD_Executable=md_executable)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0)
            md_tsk.link_input_data += [
                '%s/inpcrd' % tar_dict[0],
                '%s/prmtop' % tar_dict[0],
                '%s/mdin_{0}'.format(r) %
                tar_dict[0]  #Use for full temperature exchange
                #'%s/mdin'%tar_dict[0]  #Testing only
            ]
            md_tsk.arguments = [
                '-O',
                '-p',
                'prmtop',
                '-i',
                'mdin_{0}'.format(r),  # Use this for full Temperature Exchange
                '-c',
                'inpcrd',
                '-o',
                'out_{0}'.format(r),
                '-inf',
                'mdinfo_{0}'.format(r)
            ]
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                p.name, md_stg.name, md_tsk.name)

            md_stg.add_tasks(md_tsk)
            self.md_task_list.append(md_tsk)
            #print md_tsk.uid
        p.add_stages(md_stg)
        #stage_uids.append(md_stg.uid)

        # First Exchange Stage

        ex_stg = Stage()
        ex_stg.name = 'exstg0'
        self._prof.prof('InitEx_0', uid=self._uid)
        #with open('logfile.log', 'a') as logfile:
        #   logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n')
        # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition
        # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs.
        # Said pairs then exchange configurations by linking output configuration files appropriately.

        ex_tsk = Task()
        ex_tsk.name = 'extsk0'
        ex_tsk.executable = ['python']
        ex_tsk.upload_input_data = [ExchangeMethod]
        for r in range(Replicas):
            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]
        ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas), '0']
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = ['exchangePairs_0.dat']
        ex_stg.add_tasks(ex_tsk)
        #task_uids.append(ex_tsk.uid)
        p.add_stages(ex_stg)
        self.ex_task_list.append(ex_tsk)
        #self.ex_task_uids.append(ex_tsk.uid)
        self.Book.append(md_dict)
        return p
Exemplo n.º 45
0
    def generate_aggregating_stage():
        """
        Function to concatenate the MD trajectory (h5 contact map)
        """
        s2 = Stage()
        s2.name = 'S2.aggregating'

        # Aggregation task
        t2 = Task()

        # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_to_CVAE/MD_to_CVAE.py
        t2.pre_exec = [
            '. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh',
            'conda activate %s' % cfg['conda_pytorch'],
            'export LANG=en_US.utf-8', 'export LC_ALL=en_US.utf-8'
        ]
        # preprocessing for molecules' script, it needs files in a single
        # directory
        # the following pre-processing does:
        # 1) find all (.dcd) files from openmm results
        # 2) create a temp directory
        # 3) symlink them in the temp directory
        t2.pre_exec += [
            'export dcd_list=(`ls %s/MD_exps/%s/omm_runs_*/*dcd`)' %
            (cfg['base_path'], cfg['system_name']),
            'export tmp_path=`mktemp -p %s/MD_to_CVAE/ -d`' % cfg['base_path'],
            'for dcd in ${dcd_list[@]}; do tmp=$(basename $(dirname $dcd)); ln -s $dcd $tmp_path/$tmp.dcd; done',
            'ln -s %s $tmp_path/prot.pdb' % cfg['pdb_file'], 'ls ${tmp_path}'
        ]

        t2.pre_exec += [
            'unset CUDA_VISIBLE_DEVICES', 'export OMP_NUM_THREADS=4'
        ]

        # - Each node takes 6 ranks
        # - each rank processes 2 files
        # - each iteration accumulates files to process
        cnt_constraint = min(cfg['node_counts'] * 6,
                             cfg['md_counts'] * max(1, CUR_STAGE) // 2)

        t2.executable = ['%s/bin/python' % (cfg['conda_pytorch'])
                         ]  # MD_to_CVAE.py
        t2.arguments = [
            '%s/scripts/traj_to_dset.py' % cfg['molecules_path'], '-t',
            '$tmp_path', '-p',
            '%s/Parameters/input_protein/prot.pdb' % cfg['base_path'], '-r',
            '%s/Parameters/input_protein/prot.pdb' % cfg['base_path'], '-o',
            '%s/MD_to_CVAE/cvae_input.h5' % cfg['base_path'],
            '--contact_maps_parameters',
            "kernel_type=threshold,threshold=%s" % cfg['cutoff'], '-s',
            cfg['selection'], '--rmsd', '--fnc', '--contact_map',
            '--point_cloud', '--num_workers', 2, '--distributed', '--verbose'
        ]

        # Add the aggregation task to the aggreagating stage
        t2.cpu_reqs = {
            'processes': 1 * cnt_constraint,
            'process_type': "MPI",
            'threads_per_process': 6 * 4,
            'thread_type': 'OpenMP'
        }

        s2.add_tasks(t2)
        return s2
Exemplo n.º 46
0
def get_pipeline(instance, iterations):

    # Create a Pipeline object
    p = Pipeline()

    # Create Stage 1
    s1 = Stage()

    # Create a Task
    t1 = Task()
    t1.pre_exec = ['module load python/2.7.7-anaconda']
    t1.executable = ['python']
    t1.arguments = [
        'analysis_1.py', '--template', 'CB7G3_template.mdp', '--newname',
        'CB7G3_run.mdp', '--wldelta', '2', '--equilibrated', 'False',
        '--lambda_state', '0', '--seed',
        '%s' % SEED
    ]
    t1.cores = 1
    t1.copy_input_data = [
        '$SHARED/CB7G3_template.mdp', '$SHARED/analysis_1.py'
    ]

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    for it in range(1, iterations + 1):

        # Create Stage 2
        s2 = Stage()

        # Create a Task
        t2 = Task()
        t2.pre_exec = [
            'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash'
        ]
        t2.executable = ['gmx grompp']
        t2.arguments = [
            '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n',
            'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10'
        ]
        t2.cores = 1
        t2.copy_input_data = [
            '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp',
            '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp'
        ]

        if it == 0:
            t2.copy_input_data += [
                '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' %
                (p.uid, s1.uid, t1.uid), '$SHARED/CB7G3.gro'
            ]
        else:
            t2.copy_input_data += [
                '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' %
                (p.uid, s4.uid, t4.uid),
                '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' %
                (p.uid, s3.uid, t3.uid)
            ]

        # Add the Task to the Stage
        s2.add_tasks(t2)

        # Add Stage to the Pipeline
        p.add_stages(s2)

        # Create Stage 3
        s3 = Stage()

        # Create a Task
        t3 = Task()
        t3.pre_exec = [
            'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash'
        ]
        t3.executable = ['gmx mdrun']
        t3.arguments = [
            '-nt',
            20,
            '-deffnm',
            'CB7G3',
            '-dhdl',
            'CB7G3_dhdl.xvg',
        ]
        t3.cores = 20
        # t3.mpi = True
        t3.copy_input_data = [
            '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.uid, s2.uid, t2.uid)
        ]
        t3.copy_output_data = [
            'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format(
                it, instance),
            'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format(
                it, instance),
            'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format(
                it, instance),
            'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format(it, instance)
        ]
        t3.download_output_data = [
            'CB7G3.xtc > CB7G3_run{1}_gen{0}.xtc'.format(it, instance),
            'CB7G3.log > CB7G3_run{1}_gen{0}.log'.format(it, instance),
            'CB7G3_dhdl.xvg > CB7G3_run{1}_gen{0}_dhdl.xvg'.format(
                it, instance),
            'CB7G3_pullf.xvg > CB7G3_run{1}_gen{0}_pullf.xvg'.format(
                it, instance),
            'CB7G3_pullx.xvg > CB7G3_run{1}_gen{0}_pullx.xvg'.format(
                it, instance),
            'CB7G3.gro > CB7G3_run{1}_gen{0}.gro'.format(it, instance)
        ]

        # Add the Task to the Stage
        s3.add_tasks(t3)

        # Add Stage to the Pipeline
        p.add_stages(s3)

        # Create Stage 4
        s4 = Stage()

        # Create a Task
        t4 = Task()
        t4.pre_exec = [
            'module load python',
            'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis/alchemical_analysis:$PYTHONPATH',
            'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis:$PYTHONPATH',
            'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH',
            'ln -s ../staging_area data'
        ]
        t4.executable = ['python']
        t4.arguments = [
            '--newname=CB7G3_run.mdp',
            '--template=CB7G3_template.mdp',
            '--dir=./data',
            #'--prev_data=%s'%DATA_LOC
            '--gen={0}'.format(it, instance),
            '--run={1}'.format(it, instance)
        ]
        t4.cores = 1
        t4.link_input_data = [
            '$SHARED/analysis_2.py',
            '$SHARED/alchemical_analysis.py',
            '$SHARED/CB7G3_template.mdp',
        ]
        t4.download_output_data = [
            'analyze_1/results.txt > results_run{1}_gen{0}.txt'.format(
                it, instance),
            'STDOUT > stdout_run{1}_gen{0}'.format(it, instance),
            'STDERR > stderr_run{1}_gen{0}'.format(it, instance),
            'CB7G3_run.mdp > CB7G3_run{1}_gen{0}.mdp'.format(it, instance),
            'results_average.txt > results_average_run{1}_gen{0}.txt'.format(
                it, instance)
        ]

        # Add the Task to the Stage
        s4.add_tasks(t4)

        # Add Stage to the Pipeline
        p.add_stages(s4)

    return p
Exemplo n.º 47
0
def test_task_to_dict():
    '''
    **Purpose**: Test if the 'to_dict' function of Task class converts all
                 expected attributes of the Task into a dictionary
    '''

    t = Task()
    d = t.to_dict()

    assert d == {'uid'                  : 'task.0000',
                 'name'                 : '',
                 'state'                : states.INITIAL,
                 'state_history'        : [states.INITIAL],
                 'pre_exec'             : [],
                 'executable'           : '',
                 'arguments'            : [],
                 'post_exec'            : [],
                 'cpu_reqs'             : {'processes'           : 1,
                                           'process_type'        : None,
                                           'threads_per_process' : 1,
                                           'thread_type'         : None},
                 'gpu_reqs'             : {'processes'           : 0,
                                           'process_type'        : None,
                                           'threads_per_process' : 0,
                                           'thread_type'         : None},
                 'lfs_per_process'      : 0,
                 'upload_input_data'    : [],
                 'copy_input_data'      : [],
                 'link_input_data'      : [],
                 'link_output_data'     : [],
                 'move_input_data'      : [],
                 'copy_output_data'     : [],
                 'move_output_data'     : [],
                 'download_output_data' : [],
                 'sandbox'              : '',
                 'stdout'               : '',
                 'stderr'               : '',
                 'exit_code'            : None,
                 'path'                 : None,
                 'tag'                  : None,
                 'parent_stage'         : {'uid' : None, 'name' : None},
                 'parent_pipeline'      : {'uid' : None, 'name' : None}}


    t                                 = Task()
    t.uid                             = 'test.0017'
    t.name                            = 'new'
    t.pre_exec                        = ['module load abc']
    t.executable                      = ['sleep']
    t.arguments                       = ['10']
    t.cpu_reqs['processes']           = 10
    t.cpu_reqs['threads_per_process'] = 2
    t.gpu_reqs['processes']           = 5
    t.gpu_reqs['threads_per_process'] = 3
    t.lfs_per_process                 = 1024
    t.upload_input_data               = ['test1']
    t.copy_input_data                 = ['test2']
    t.link_input_data                 = ['test3']
    t.move_input_data                 = ['test4']
    t.copy_output_data                = ['test5']
    t.move_output_data                = ['test6']
    t.download_output_data            = ['test7']
    t.stdout                          = 'out'
    t.stderr                          = 'err'
    t.exit_code                       = 1
    t.path                            = 'a/b/c'
    t.tag                             = 'task.0010'
    t.parent_stage                    = {'uid': 's1', 'name': 'stage1'}
    t.parent_pipeline                 = {'uid': 'p1', 'name': 'pipeline1'}

    d = t.to_dict()

    assert d == {'uid'                  : 'test.0017',
                 'name'                 : 'new',
                 'state'                : states.INITIAL,
                 'state_history'        : [states.INITIAL],
                 'pre_exec'             : ['module load abc'],
                 'executable'           : 'sleep',
                 'arguments'            : ['10'],
                 'post_exec'            : [],
                 'cpu_reqs'             : {'processes'           : 10,
                                           'process_type'        : None,
                                           'threads_per_process' : 2,
                                           'thread_type'         : None},
                 'gpu_reqs'             : {'processes'           : 5,
                                           'process_type'        : None,
                                           'threads_per_process' : 3,
                                           'thread_type'         : None},
                 'lfs_per_process'      : 1024,
                 'upload_input_data'    : ['test1'],
                 'copy_input_data'      : ['test2'],
                 'link_input_data'      : ['test3'],
                 'link_output_data'      : [],
                 'move_input_data'      : ['test4'],
                 'copy_output_data'     : ['test5'],
                 'move_output_data'     : ['test6'],
                 'download_output_data' : ['test7'],
                 'sandbox'              : '',
                 'stdout'               : 'out',
                 'stderr'               : 'err',
                 'exit_code'            : 1,
                 'path'                 : 'a/b/c',
                 'tag'                  : 'task.0010',
                 'parent_stage'         : {'uid': 's1', 'name' : 'stage1'},
                 'parent_pipeline'      : {'uid': 'p1', 'name' : 'pipeline1'}}


    t.executable = 'sleep'
    d = t.to_dict()

    assert d == {'uid'                  : 'test.0017',
                 'name'                 : 'new',
                 'state'                : states.INITIAL,
                 'state_history'        : [states.INITIAL],
                 'pre_exec'             : ['module load abc'],
                 'executable'           : 'sleep',
                 'arguments'            : ['10'],
                 'post_exec'            : [],
                 'cpu_reqs'             : {'processes'           : 10,
                                           'process_type'        : None,
                                           'threads_per_process' : 2,
                                           'thread_type'         : None},
                 'gpu_reqs'             : {'processes'           : 5,
                                           'process_type'        : None,
                                           'threads_per_process' : 3,
                                           'thread_type'         : None},
                 'lfs_per_process'      : 1024,
                 'upload_input_data'    : ['test1'],
                 'copy_input_data'      : ['test2'],
                 'link_input_data'      : ['test3'],
                 'link_output_data'      : [],
                 'move_input_data'      : ['test4'],
                 'copy_output_data'     : ['test5'],
                 'move_output_data'     : ['test6'],
                 'download_output_data' : ['test7'],
                 'sandbox'              : '',
                 'stdout'               : 'out',
                 'stderr'               : 'err',
                 'exit_code'            : 1,
                 'path'                 : 'a/b/c',
                 'tag'                  : 'task.0010',
                 'parent_stage'         : {'uid': 's1', 'name' : 'stage1'},
                 'parent_pipeline'      : {'uid': 'p1', 'name' : 'pipeline1'}}
Exemplo n.º 48
0
def GenerateTask(tcfg, ecfg, pipe_name, stage_name, task_name):

    # Initialize a task object
    t = Task()

    # Define magic variable dictionary
    mvar_dict = {"PIPELINE_ID": pipe_name}

    # Give this task object a name
    t.name = task_name

    # Pre exec let you load modules, set environment before executing the workload
    if tcfg['pre_exec'] != "":
        t.pre_exec = [tcfg['pre_exec']]

    # Executable to use for the task
    t.executable = tcfg['executable']

    # If there's a user-defined input file (likely for genmod modules), add it to the
    # options list and upload file list if needed
    if "input_data_file" in tcfg['options']:
        tcfg['upload_input_data'].append(
            os.path.join(ecfg['exp_dir'], "input", ecfg['input_data_file']))

    # List of arguments for the executable
    t.arguments = [tcfg['script']] + match_options(tcfg['options'],
                                                   ecfg['options'])

    # CPU requirements for this task
    t.cpu_threads = {
        'processes': tcfg['cpu']['processes'],
        'process-type': tcfg['cpu']['process-type'],
        'threads-per-process': tcfg['cpu']['threads-per-process'],
        'thread-type': tcfg['cpu']['thread-type'],
    }

    # Upload data from your local machine to the remote machine
    # Note: Remote machine can be the local machine
    t.upload_input_data = tcfg['upload_input_data']

    # Copy data from other stages/tasks for use in this task
    copy_list = []
    if "copy_input_data" in tcfg.keys():
        for copy_stage in tcfg['copy_input_data'].keys():
            for copy_task in tcfg['copy_input_data'][copy_stage].keys():
                loc = "$Pipeline_{0}_Stage_{1}_Task_{2}".format(
                    pipe_name, copy_stage, copy_task)
                copy_list.extend([
                    '{0}/{1}'.format(loc, mvar_replace_dict(mvar_dict, x))
                    for x in tcfg['copy_input_data'][copy_stage][copy_task]
                ])

    # Append the copy list (if any) to the task object
    t.copy_input_data = copy_list

    # Set the download data for the task
    download_list = []
    outdir = os.path.join(ecfg['exp_dir'], "output")
    if "download_output_data" in tcfg.keys():
        download_list.extend([
            '{0} > {1}/{0}'.format(mvar_replace_dict(mvar_dict, x), outdir)
            for x in tcfg['download_output_data']
        ])

    # Append the download list to this task
    t.download_output_data = download_list

    # Return the task object
    return (t)
Exemplo n.º 49
0
if __name__ == '__main__':

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # List to hold uids of Tasks of Stage 1
    s1_task_uids = list()

    for cnt in range(10):

        # Create a Task object
        t = Task()
        t.executable = ['/bin/echo']   # Assign executable to the task
        t.arguments = ['I am task %s in %s'%(cnt, s1.name)]  # Assign arguments for the task executable

        # Add the Task to the Stage
        s1.add_tasks(t)

        # Add Task uid to list
        s1_task_uids.append(t.uid)

    # Add Stage to the Pipeline
    p.add_stages(s1)


    # Create another Stage object
    s2 = Stage()
Exemplo n.º 50
0
def test_task_exceptions(s, l, i, b):
    '''
    **Purpose**: Test if all attribute assignments raise exceptions
                 for invalid values
    '''

    t = Task()

    data_type = [s, l, i, b]

    for data in data_type:

        # special case due to backward compatibility
        if not isinstance(data, str) and \
           not isinstance(data, list):

            with pytest.raises(ree.TypeError): t.executable = data


        if not isinstance(data, str):

            with pytest.raises(ree.TypeError): t.name                 = data
            with pytest.raises(ree.TypeError): t.path                 = data
            with pytest.raises(ree.TypeError): t.parent_stage         = data
            with pytest.raises(ree.TypeError): t.parent_pipeline      = data
            with pytest.raises(ree.TypeError): t.stdout               = data
            with pytest.raises(ree.TypeError): t.stderr               = data


        if not isinstance(data, list):

            with pytest.raises(ree.TypeError): t.pre_exec             = data
            with pytest.raises(ree.TypeError): t.arguments            = data
            with pytest.raises(ree.TypeError): t.post_exec            = data
            with pytest.raises(ree.TypeError): t.upload_input_data    = data
            with pytest.raises(ree.TypeError): t.copy_input_data      = data
            with pytest.raises(ree.TypeError): t.link_input_data      = data
            with pytest.raises(ree.TypeError): t.move_input_data      = data
            with pytest.raises(ree.TypeError): t.copy_output_data     = data
            with pytest.raises(ree.TypeError): t.download_output_data = data
            with pytest.raises(ree.TypeError): t.move_output_data     = data

        if not isinstance(data, str) and \
           not isinstance(data, str):

            with pytest.raises(ree.ValueError):
                t.cpu_reqs = {'processes'          : 1,
                              'process_type'       : data,
                              'threads_per_process': 1,
                              'thread_type'        : None}
                t.cpu_reqs = {'processes'          : 1,
                              'process_type'       : None,
                              'threads_per_process': 1,
                              'thread_type'        : data
                            }
                t.gpu_reqs = {'processes'          : 1,
                              'process_type'       : data,
                              'threads_per_process': 1,
                              'thread_type'        : None
                            }
                t.gpu_reqs = {'processes'          : 1,
                              'process_type'       : None,
                              'threads_per_process': 1,
                              'thread_type'        : data}

        if not isinstance(data, int):

            with pytest.raises(ree.TypeError):
                t.cpu_reqs = {'processes'           : data,
                              'process_type'        : None,
                              'threads_per_process' : 1,
                              'thread_type'         : None}

            with pytest.raises(ree.TypeError):
                t.cpu_reqs = {'processes'           : 1,
                              'process_type'        : None,
                              'threads_per_process' : data,
                              'thread_type'         : None}

            with pytest.raises(ree.TypeError):
                t.gpu_reqs = {'processes'           : data,
                              'process_type'        : None,
                              'threads_per_process' : 1,
                              'thread_type'         : None}

            with pytest.raises(ree.TypeError):
                t.gpu_reqs = {'processes'           : 1,
                              'process_type'        : None,
                              'threads_per_process' : data,
                              'thread_type'         : None}
Exemplo n.º 51
0
def InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod):     # "Cycle" = 1 MD stage plus the subsequent exchange computation

    #Initialize Pipeline
    p = Pipeline()

    md_dict    = dict() #Bookkeeping
    tar_dict   = dict() #Bookkeeping


    #Create Tarball of input data

        


    #Create Untar Stage
    untar_stg = Stage()
    #Untar Task
    untar_tsk                   = Task()
    untar_tsk.executable        = ['python']
    untar_tsk.upload_input_data = ['untar_input_files.py','../../Input_Files.tar']
    untar_tsk.arguments         = ['untar_input_files.py','Input_Files.tar']
    untar_tsk.cores             = 1

    untar_stg.add_tasks(untar_tsk)
    p.add_stages(untar_stg)


    tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid,
                                                   untar_stg.uid,
                                                   untar_tsk.uid)
    print tar_dict[0]
    # First MD stage: needs to be defined separately since workflow is not built from a predetermined order
    md_stg = Stage()


    # MD tasks

    for r in range (Replicas):
        md_tsk                  = Task()
        md_tsk.executable       = [MD_Executable]
        md_tsk.link_input_data += ['%s/inpcrd'%tar_dict[0],
                                   '%s/prmtop'%tar_dict[0],
                                   #'%s/mdin_{0}'.format(r)%tar_dict[0]
                                   '%s/mdin'%tar_dict[0] 
                                   ] 
        md_tsk.pre_exec         = ['export AMBERHOME=$HOME/amber/amber14/'] #Should be abstracted from the user?
        md_tsk.arguments        = ['-O','-p','prmtop', '-i', 'mdin',               #'mdin_{0}'.format(r), # Use this for full Temperature Exchange
                                   '-c','inpcrd','-o','out_{0}'.format(r),
                                   '-inf','mdinfo_{0}'.format(r)]
        md_tsk.cores = Replica_Cores
        md_tsk.mpi = True
        md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)

        md_stg.add_tasks(md_tsk)
        #task_uids.append(md_tsk.uid)
    p.add_stages(md_stg)
    #stage_uids.append(md_stg.uid)
                                                

    # First Exchange Stage
    ex_stg = Stage()

    # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition
    # and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. 
    # Said pairs then exchange configurations by linking output configuration files appropriately.

    ex_tsk                      = Task()
    ex_tsk.executable           = ['python']
    #ex_tsk.upload_input_data    = ['exchangeMethods/TempEx.py']
    ex_tsk.upload_input_data    = [ExchangeMethod]  
    for r in range (Replicas):
        ex_tsk.link_input_data     += ['%s/mdinfo_%s'%(md_dict[r],r)]
    ex_tsk.arguments            = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores                = 1
    ex_tsk.mpi                  = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    #task_uids.append(ex_tsk.uid)
    p.add_stages(ex_stg)
    #stage_uids.append(ex_stg.uid)
    Book.append(md_dict)
    #print Book
    return p
Exemplo n.º 52
0
# VM, set "RMQ_HOSTNAME" and "RMQ_PORT" in the session where you are running
# this script.
hostname = os.environ.get('RMQ_HOSTNAME', 'localhost')
port = os.environ.get('RMQ_PORT', 5672)

if __name__ == '__main__':

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.executable = '/bin/bash'
    t1.arguments = [
        '-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt'
    ]

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    # Create another Stage object
    s2 = Stage()
    s2.name = 'Stage 2'

    # Create a Task object
Exemplo n.º 53
0
        # Create file structure
        'mkdir -p DATABASES_MPI',
        'mkdir -p OUTPUT_FILES',

        # Copy data
        'cp -r /projects/TROMP/entk/specfem3d_globe/DATA .',

        # Copy input files to output directory
        'cp DATA/Par_file OUTPUT_FILES/',
        'cp DATA/CMTSOLUTION OUTPUT_FILES/',
        'cp DATA/STATIONS OUTPUT_FILES',

        # Link binaries
        'ln -s /projects/TROMP/entk/specfem3d_globe/bin .',
    ]
    t1.executable = ['./bin/xmeshfem3D']
    t1.cpu_reqs = {
        'processes': 4,
        'process_type': 'MPI',
        'threads_per_process': 1,
        'thread_type': 'OpenMP'
    }

    t1.post_exec = [  # Tar output files
        'tar -zcf specfem_data.tar.gz bin DATA DATABASES_MPI OUTPUT_FILES',

        # Copy to scratch folder
        'cp specfem_data.tar.gz /projects/TROMP/entk/scratch/',
    ]
    t1.download_output_data = ['STDOUT', 'STDERR', 'specfem_data.tar.gz']
Exemplo n.º 54
0
    p = Pipeline()

    # Second stage to perform one specfem task
    specfem_stage = Stage()

    t1 = Task()

    t1.pre_exec = [  # Modules to be loaded
        'module purge',
        'module load intel/18.0',
        'module load intel-mpi/intel/2018.3',

        # Untar the input data
        'tar -zxf specfem_data.tar.gz',
    ]
    t1.executable = ['./bin/xspecfem3D']
    t1.cpu_reqs = {
        'processes': 4,
        'process_type': 'MPI',
        'threads_per_process': 1,
        'thread_type': 'OpenMP'
    }
    t1.copy_input_data = ['/projects/TROMP/entk/scratch/specfem_data.tar.gz']
    t1.post_exec = [  # Tar output files
        'tar -zcf specfem_final.tar.gz bin DATA DATABASES_MPI OUTPUT_FILES',

        # Copy to scratch folder
        'cp specfem_final.tar.gz /projects/TROMP/entk/scratch/',
    ]
    t1.download_output_data = ['STDOUT', 'STDERR', 'specfem_final.tar.gz']
Exemplo n.º 55
0
def create_workflow(Kconfig, args):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    if str(socket.gethostname()) == 'giotto.rice.edu':
        combined_path = str(Kconfig.remote_output_directory) + '-giotto'
    else:
        combined_path = str(Kconfig.remote_output_directory
                            )  #'/u/sciteam/hruska/scratch/extasy-tica'
    num_parallel = int(Kconfig.NODESIZE)
    num_replicas = int(Kconfig.num_replicas)
    #if cur_iter==0:
    #	restart_iter=0
    #else:
    #	restart_iter=cur_iter

    if cur_iter == 0:
        pre_proc_stage = Stage()
        pre_proc_task = Task()
        pre_proc_task.pre_exec = [
            'export tasks=pre_proc_task',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task.executable = ['mv']
        pre_proc_task.arguments = [
            combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M")
        ]
        pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        pre_proc_stage.add_tasks(pre_proc_task)
        wf.add_stages(pre_proc_stage)
        pre_proc_stage2 = Stage()
        pre_proc_task2 = Task()
        pre_proc_task2.pre_exec = [
            'export tasks=pre_proc_task',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task2.executable = ['ls']
        pre_proc_task2.arguments = ['-l']
        pre_proc_task2.copy_input_data = [
            '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig),
            '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_run_file, combined_path, Kconfig.md_run_file),
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_reference, combined_path, Kconfig.md_reference)
        ]

        pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid)
        pre_proc_stage2.add_tasks(pre_proc_task2)
        wf.add_stages(pre_proc_stage2)

        # ------------------------------------------------------------------------------------------------------------------

    while (cur_iter < int(Kconfig.num_iterations)):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        def_rep_per_thread = int(np.ceil(num_replicas / num_parallel))
        num_allocated_rep = 0
        num_used_threads = 0
        while (num_allocated_rep < num_replicas):
            if (num_used_threads == num_parallel):
                print("ALLERT tried use more gpus than allocated")
            if ((num_replicas - num_allocated_rep) > def_rep_per_thread):
                use_replicas = def_rep_per_thread
            else:
                use_replicas = (num_replicas - num_allocated_rep)
            sim_task = Task()
            sim_task.executable = ['python']

            pre_exec_arr = [
                'module swap PrgEnv-cray PrgEnv-gnu', 'module load bwpy/1.2.5',
                'module add bwpy-mpi', 'module add fftw/3.3.4.10',
                'module add cray-netcdf',
                'module add cudatoolkit/9.1.85_3.10-1.0502.df1cc54.3.1',
                'module add cmake/3.1.3', 'module unload darshan xalt',
                'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
                'export FC=ftn',
                'source /projects/sciteam/bamm/hruska/vpy4/bin/activate',
                'export tasks=md',
                'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
            ]
            #if cur_iter==0 and num_allocated_rep==0:
            #  pre_exec_arr = pre_exec_arr + [ 'mv %s']
            sim_task.pre_exec = pre_exec_arr
            sim_task.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': None
            }
            sim_task.cpu_reqs = {
                'processes': 0,
                'process_type': None,
                'threads_per_process': 0,
                'thread_type': None
            }
            sim_task.arguments = [
                'run_openmm.py', '--trajstride',
                str(Kconfig.trajstride), '--idxstart',
                str(num_allocated_rep), '--idxend',
                str((num_allocated_rep + use_replicas)), '--path',
                combined_path, '--iter',
                str(cur_iter), '--md_steps',
                str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log'
            ]
            if Kconfig.md_use_xml == 'yes':
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/system-5.xml > system-5.xml',
                    '$SHARED/integrator-5.xml > integrator-5.xml'
                ]
            else:
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file))
                ]
            copy_arr = []
            if cur_iter == 0:
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    copy_arr = copy_arr + [
                        '$SHARED/%s > %s/iter0_input%s.pdb' %
                        (Kconfig.md_input_file, combined_path, idx)
                    ]
                #if num_allocated_rep==0:
                #  copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig),
                #                         '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
                #                         '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file)
                #                           ]

            #if cur_iter==0 and num_allocated_rep==0:
            #   copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)]
            sim_task.link_input_data = link_arr  #+ copy_arr
            sim_task.copy_input_data = copy_arr
            if str(Kconfig.strategy) == 'extend':
                copy_out = []
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
                    copy_out = copy_out + [
                        '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' %
                        (combined_path, cur_iter, idx, combined_path,
                         (cur_iter + 1), idx)
                    ]
                sim_task.copy_output_data = copy_out
                #if Kconfig.ndx_file is not None:
                #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))

            num_allocated_rep = num_allocated_rep + use_replicas
            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)

        wf.add_stages(sim_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # pre_ana_task:
        #     Purpose:   The output of each gromacs instance in the simulaxftion stage is a small coordinate file.
        #                 Concatenate such files from each of the gromacs instances to form a larger file.
        #     Arguments:
        #             numCUs = number of simulation instances / number of small files to be concatenated
        if str(Kconfig.strategy) != 'extend':
            ana_stage = Stage()
            ana_task = Task()
            ana_task.pre_exec = [
                'module swap PrgEnv-cray PrgEnv-gnu', 'module load bwpy/1.2.5',
                'module add bwpy-mpi', 'module add fftw/3.3.4.10',
                'module add cray-netcdf',
                'module add cudatoolkit/9.1.85_3.10-1.0502.df1cc54.3.1',
                'module add cmake/3.1.3', 'module unload darshan xalt',
                'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
                'export FC=ftn',
                'source /projects/sciteam/bamm/hruska/vpy4/bin/activate',
                'export tasks=tica_msm_ana', 'export PYEMMA_NJOBS=1',
                'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
            ]
            ana_task.executable = ['python']
            ana_task.arguments = [
                'run-tica-msm.py', '--path', combined_path, '--n_select',
                str(num_replicas), '--cur_iter',
                str(cur_iter), '--Kconfig',
                str(args.Kconfig), '--ref',
                str(Kconfig.md_reference), '>', 'analyse.log'
            ]

            ana_task.cpu_reqs = {
                'processes': 1,
                'process_type': 'MPI',
                'threads_per_process': 16,
                'thread_type': None
            }

            ana_task.link_input_data = [
                '$SHARED/run-tica-msm.py > run-tica-msm.py',
                '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
            ]

            #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))):
            ana_task.copy_output_data = [
                'analyse.log > %s/iter%s_analyse.log' %
                (combined_path, cur_iter)
            ]

            #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter),
            #                              'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)]
            #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter

            ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
                wf.uid, ana_stage.uid, ana_task.uid)
            ana_stage.add_tasks(ana_task)
            wf.add_stages(ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # lsdmap:
        #     Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values.
        #     Arguments:
        #             config = name of the config file to be used during LSDMap

        #if(cur_iter % Kconfig.nsave == 0):
        #     post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter,
        #                                   'weight_out.w > output/iter_%s/weight_out.w' % cur_iter,
        #                                   'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter),
        #                                   'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter),
        #                                   '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter)
        #                                   ]

        #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter),
        #                           'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter),
        #                           'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)]

        #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid)

        #post_ana_stage.add_tasks(post_ana_task)
        #wf.add_stages(post_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
Exemplo n.º 56
0
def generate_pipeline():
    def func_condition():

        global CUR_NEW_STAGE, MAX_NEW_STAGE

        if CUR_NEW_STAGE <= MAX_NEW_STAGE:
            return True

        return False

    def func_on_true():

        global CUR_NEW_STAGE

        CUR_NEW_STAGE += 1

        s = Stage()

        for i in range(10):
            t = Task()
            t.executable = ['sleep']
            t.arguments = ['30']

            s.add_tasks(t)

        # Add post-exec to the Stage
        s.post_exec = {
            'condition': func_condition,
            'on_true': func_on_true,
            'on_false': func_on_false
        }

        p.add_stages(s)

    def func_on_false():
        print 'Done'

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    for i in range(10):

        t1 = Task()
        t1.executable = ['sleep']
        t1.arguments = ['30']

        # Add the Task to the Stage
        s1.add_tasks(t1)

    # Add post-exec to the Stage
    s1.post_exec = {
        'condition': func_condition,
        'on_true': func_on_true,
        'on_false': func_on_false
    }

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
Exemplo n.º 57
0
        for cnt in range(10):
            p = Pipeline()

            #Create the stages in a pipline
            for cnt2 in range(12):
                # Create a Stage object
                s = Stage()
                # Create a Task object
                t = Task()
                if cnt2 % 2 == 0:
                    t.name = 'a %s' % (cnt2 + 1)
                else:
                    t.name = 'b %s' % (cnt2 + 1)

                #The task does nothing ("sleeps") for one second
                t.executable = '$HOME/tem/stress-ng'  # Assign executable to the task
                t.arguments = ['-c', '1', '-t', '100']
                #t.arguments = ['100']  # Assign arguments for the task executable
                # Add the Task to the Stage
                s.add_tasks(t)
                # Add Stage to the Pipeline
                p.add_stages(s)
            Pipelines.append(p)

        # Create Application Manager
        appman = AppManager(hostname=hostname,
                            port=port,
                            autoterminate=False,
                            username=username,
                            password=password)
        # Create a dictionary describe four mandatory keys:
Exemplo n.º 58
0
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.name = 't1'
    t1.executable = '/bin/bash'
    t1.arguments = [
        '-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt'
    ]

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    # Create another Stage object to hold character count tasks
    s2 = Stage()
    s2.name = 's2'
    s2_task_uids = []

    for cnt in range(30):

        # Create a Task object
        t2 = Task()
        t2.name = 't%s' % (cnt + 1)
        t2.executable = '/bin/bash'
        t2.arguments = [
            '-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt'
        ]
        # Copy data from the task in the first stage to the current task's location
        t2.copy_input_data = [
            '$Pipeline_%s_Stage_%s_Task_%s/output.txt' %
            (p.name, s1.name, t1.name)
        ]

        # Add the Task to the Stage
        s2.add_tasks(t2)
        s2_task_uids.append(t2.name)

    # Add Stage to the Pipeline
    p.add_stages(s2)

    # Create another Stage object to hold checksum tasks
    s3 = Stage()
    s3.name = 's3'

    for cnt in range(30):

        # Create a Task object
        t3 = Task()
        t3.name = 't%s' % (cnt + 1)
        t3.executable = '/bin/bash'
        t3.arguments = ['-l', '-c', 'sha1sum ccount.txt > chksum.txt']
        # Copy data from the task in the first stage to the current task's location
        t3.copy_input_data = [
            '$Pipeline_%s_Stage_%s_Task_%s/ccount.txt' %
            (p.name, s2.name, s2_task_uids[cnt])
        ]
        # Download the output of the current task to the current location
        t3.download_output_data = ['chksum.txt > chksum_%s.txt' % cnt]

        # Add the Task to the Stage
        s3.add_tasks(t3)

    # Add Stage to the Pipeline
    p.add_stages(s3)

    return p
Exemplo n.º 59
0
def create_workflow(Kconfig, args):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    combined_path = str(Kconfig.remote_output_directory)
    num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE)
    num_replicas = int(Kconfig.num_replicas)
    script_ana = str(Kconfig.script_ana)
    config_file = str(args.Kconfig).rsplit('/', 1)[-1]
    try:
        systemxml = str(Kconfig.systemxml)
    except:
        systemxml = 'system-5.xml'
    try:
        integratorxml = str(Kconfig.integratorxml)
    except:
        integratorxml = 'integrator-5.xml'
    md_settings = Kconfig.md_env
    if Kconfig.env_ana_same == 'True':
        ana_settings = md_settings
    else:
        ana_settings = Kconfig.ana_env
    print("set", num_parallel, md_settings)
    iter_found = 0
    while len(glob.glob('%s/iter%s_input*.pdb' %
                        (combined_path, iter_found))) >= num_replicas:
        iter_found += 1
    cur_iter = max(0, iter_found - 1)
    print("cur_iter", cur_iter)
    if cur_iter == 0:
        pre_proc_stage2 = Stage()
        pre_proc_task2 = Task()
        pre_proc_task2.pre_exec = [
            'export tasks=pre_proc_task',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task2.executable = ['ls']
        pre_proc_task2.arguments = ['-l']
        copy_arr = [
            '$SHARED/%s > %s/%s' % (config_file, combined_path, config_file),
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_run_file, combined_path, Kconfig.md_run_file)
        ]
        if Kconfig.md_run_file != Kconfig.md_reference:
            copy_arr = copy_arr + [
                '$SHARED/%s > %s/%s' %
                (Kconfig.md_reference, combined_path, Kconfig.md_reference)
            ]
        if str(Kconfig.strategy) != 'extend':
            copy_arr = copy_arr + [
                '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana)
            ]
        print("copy_arr", copy_arr)
        pre_proc_task2.copy_input_data = copy_arr
        pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid)
        pre_proc_stage2.add_tasks(pre_proc_task2)
        wf.add_stages(pre_proc_stage2)

        # ------------------------------------------------------------------------------------------------------------------
    start_iter = cur_iter
    print("finished prep")
    while (cur_iter < int(Kconfig.num_iterations)
           and cur_iter < start_iter + 1):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        num_allocated_rep = 0
        num_used_parallel = 0
        while (num_allocated_rep < num_replicas):
            def_rep_per_thread = int(
                math.ceil(
                    float(num_replicas - num_allocated_rep) /
                    float(num_parallel - num_used_parallel)))
            use_replicas = min(def_rep_per_thread,
                               num_replicas - num_allocated_rep)
            print("u", cur_iter, use_replicas, num_replicas, num_parallel,
                  def_rep_per_thread, num_allocated_rep, num_used_parallel)
            sim_task = Task()
            sim_task.executable = ['python']

            pre_exec_arr = md_settings + [
                'export tasks=md',
                'export iter=%s' % cur_iter
            ]
            #if cur_iter==0 and num_allocated_rep==0:
            #  pre_exec_arr = pre_exec_arr + [ 'mv %s']
            sim_task.pre_exec = pre_exec_arr
            sim_task.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }
            sim_task.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 20,
                'thread_type': 'OpenMP'
            }
            sim_task.arguments = [
                'run_openmm.py', '--Kconfig', config_file, '--idxstart',
                str(num_allocated_rep), '--idxend',
                str(num_allocated_rep + use_replicas), '--path', combined_path,
                '>', 'md.log'
            ]
            #'--trajstride', str(Kconfig.trajstride),'--Kconfig', str(args.Kconfig),
            #'--idxstart',str(num_allocated_rep), '--idxend',str((num_allocated_rep+use_replicas)),
            #'--path',combined_path,'--iter',str(cur_iter),
            #'--md_steps',str(Kconfig.md_steps), '--save_traj', 'True','>', 'md.log']
            if Kconfig.md_use_xml == 'yes':
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/%s > %s' % (systemxml, systemxml),
                    '$SHARED/%s > %s' % (integratorxml, integratorxml),
                    '$SHARED/%s > %s' % (config_file, config_file)
                ]
            else:
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/%s > %s' % (config_file, config_file)
                ]
            copy_arr = []
            if cur_iter == 0:
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    copy_arr = copy_arr + [
                        '$SHARED/%s > %s/iter0_input%s.pdb' %
                        (Kconfig.md_input_file, combined_path, idx)
                    ]
                #if num_allocated_rep==0:
                #  copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig),
                #                         '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
                #                         '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file)
                #                           ]

            #if cur_iter==0 and num_allocated_rep==0:
            #   copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)]
            sim_task.link_input_data = link_arr  #+ copy_arr
            sim_task.copy_input_data = copy_arr
            copy_out = []
            #if str(Kconfig.strategy)=='extend':
            #  for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #    copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)]

            #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            ##     #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #     copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)]

            sim_task.copy_output_data = copy_out
            #if Kconfig.ndx_file is not None:
            #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))

            num_allocated_rep = num_allocated_rep + use_replicas
            num_used_parallel = num_used_parallel + 1
            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)
        if str(Kconfig.strategy) != 'extend':
            for anatask in range(1):
                print("analysis task", anatask)
                ana_task = Task()
                ana_task.executable = ['python']
                pre_exec_arr = ana_settings
                ana_task.pre_exec = pre_exec_arr
                ana_task.link_input_data = [
                    '$SHARED/%s > %s' % (script_ana, script_ana),
                    '$SHARED/%s > %s' % (config_file, config_file)
                ]
                ana_task.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': 'CUDA'
                }
                ana_task.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 20,
                    'thread_type': 'OpenMP'
                }
                ana_task.arguments = [
                    script_ana, '--Kconfig', config_file, '>', "analysis.log"
                ]
                ana_task.copy_output_data = [
                    'analysis.log > %s/analysis_iter%s_r%s.log' %
                    (combined_path, cur_iter, anatask)
                ]
                ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
                    wf.uid, sim_stage.uid, ana_task.uid)
                sim_stage.add_tasks(ana_task)
        wf.add_stages(sim_stage)
        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
Exemplo n.º 60
0
# VM, set "RMQ_HOSTNAME" and "RMQ_PORT" in the session where you are running
# this script.
hostname = os.environ.get('RMQ_HOSTNAME', 'localhost')
port = os.environ.get('RMQ_PORT', 5672)

if __name__ == '__main__':

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object 
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()    
    t1.executable = ['/bin/bash']   
    t1.arguments = ['-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt'] 

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)


    # Create another Stage object
    s2 = Stage()
    s2.name = 'Stage 2'

    # Create a Task object
    t2 = Task()