def test_pipeline_decrement_stage(): p = Pipeline() s1 = Stage() t = Task() t.executable = ['/bin/date'] s1.tasks = t s2 = Stage() t = Task() t.executable = ['/bin/date'] s2.tasks = t p.add_stages([s1, s2]) p._increment_stage() p._increment_stage() assert p._stage_count == 2 assert p._cur_stage == 2 assert p._completed_flag.is_set() == True p._decrement_stage() assert p._stage_count == 2 assert p._cur_stage == 1 assert p._completed_flag.is_set() == False p._decrement_stage() assert p._stage_count == 2 assert p._cur_stage == 0 assert p._completed_flag.is_set() == False
def test_stage_task_addition(): s = Stage() t1 = Task() t1.executable = ['/bin/date'] t2 = Task() t2.executable = ['/bin/date'] s.add_tasks(set([t1, t2])) assert type(s.tasks) == set assert s._task_count == 2 assert t1 in s.tasks assert t2 in s.tasks s = Stage() t1 = Task() t1.executable = ['/bin/date'] t2 = Task() t2.executable = ['/bin/date'] s.add_tasks([t1, t2]) assert type(s.tasks) == set assert s._task_count == 2 assert t1 in s.tasks assert t2 in s.tasks
def get_pipeline(shared_fs=False, size=1): p = Pipeline() p.name = 'p' n = 4 s1 = Stage() s1.name = 's1' for x in range(n): t = Task() t.name = 't%s'%x # dd if=/dev/random bs=<byte size of a chunk> count=<number of chunks> of=<output file name> t.executable = 'dd' if not shared_fs: t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s1_t%s.txt'%x] else: t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=/home/vivek91/s1_t%s.txt'%x] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 24 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.lfs_per_process = 1024 s1.add_tasks(t) p.add_stages(s1) s2 = Stage() s2.name = 's2' for x in range(n): t = Task() t.executable = ['dd'] if not shared_fs: t.arguments = ['if=$NODE_LFS_PATH/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s2_t%s.txt'%x] else: t.arguments = ['if=/home/vivek91/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=/home/vivek91/s2_t%s.txt'%x] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 24 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.tag = 't%s'%x s2.add_tasks(t) p.add_stages(s2) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = '/bin/bash' t1.arguments = ['-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object to hold character count tasks s2 = Stage() # Create a Task object t2 = Task() t2.executable = '/bin/bash' t2.arguments = ['-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt'] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid)] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create another Stage object to hold checksum tasks s3 = Stage() # Create a Task object t3 = Task() t3.executable = '/bin/bash' t3.arguments = ['-l', '-c', 'sha1sum ccount.txt > chksum.txt'] # Copy data from the task in the first stage to the current task's location t3.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/ccount.txt' % (p.uid, s2.uid, t2.uid)] # Download the output of the current task to the current location t3.download_output_data = ['chksum.txt > chksum_%s.txt' % cnt] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) return p
def test_stage_check_complete(): s = Stage() t1 = Task() t1.executable = ['/bin/date'] t2 = Task() t2.executable = ['/bin/date'] s.add_tasks([t1, t2]) assert s._check_stage_complete() == False s._set_tasks_state(states.DONE) assert s._check_stage_complete() == True
def test_stage_set_tasks_state(): s = Stage() t1 = Task() t1.executable = ['/bin/date'] t2 = Task() t2.executable = ['/bin/date'] s.add_tasks([t1, t2]) with pytest.raises(ValueError): s._set_tasks_state(2) s._set_tasks_state(states.DONE) assert t1.state == states.DONE assert t2.state == states.DONE
def func_for_mock_tmgr_test(mq_hostname, port, pending_queue, completed_queue): mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=mq_hostname, port=port)) mq_channel = mq_connection.channel() tasks = list() for _ in range(16): t = Task() t.state = states.SCHEDULING t.executable = '/bin/echo' tasks.append(t.to_dict()) tasks_as_json = json.dumps(tasks) mq_channel.basic_publish(exchange='', routing_key=pending_queue, body=tasks_as_json) cnt = 0 while cnt < 15: method_frame, props, body = mq_channel.basic_get(queue=completed_queue) if body: task = Task() task.from_dict(json.loads(body)) if task.state == states.DONE: cnt += 1 mq_channel.basic_ack(delivery_tag=method_frame.delivery_tag) mq_connection.close()
def generate_pipeline(name, stages): # Create a Pipeline object p = Pipeline() p.name = name for s_cnt in range(stages): # Create a Stage object s = Stage() s.name = 'Stage %s'%s_cnt for t_cnt in range(5): # Create a Task object t = Task() t.name = 'my-task' # Assign a name to the task (optional) t.executable = '/bin/echo' # Assign executable to the task # Assign arguments for the task executable t.arguments = ['I am task %s in %s in %s'%(t_cnt, s_cnt, name)] # Add the Task to the Stage s.add_tasks(t) # Add Stage to the Pipeline p.add_stages(s) return p
def test_wfp_check_processor(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp.start_processor() assert wfp.check_processor() wfp.terminate_processor() assert not wfp.check_processor()
def test_wfp_initialization(s, i, b, l): p = Pipeline() st = Stage() t = Task() t.executable = ['/bin/date'] st.add_tasks(t) p.add_stages(st) wfp = WFprocessor(sid='rp.session.local.0000', workflow=set([p]), pending_queue=['pending'], completed_queue=['completed'], mq_hostname=hostname, port=port, resubmit_failed=True) assert len(wfp._uid.split('.')) == 2 assert 'wfprocessor' == wfp._uid.split('.')[0] assert wfp._pending_queue == ['pending'] assert wfp._completed_queue == ['completed'] assert wfp._mq_hostname == hostname assert wfp._port == port assert wfp._wfp_process == None assert wfp._workflow == set([p]) if not isinstance(s, unicode): wfp = WFprocessor(sid=s, workflow=set([p]), pending_queue=l, completed_queue=l, mq_hostname=s, port=i, resubmit_failed=b)
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/date'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def test_pipeline_stage_addition(): p = Pipeline() s1 = Stage() t = Task() t.executable = ['/bin/date'] s1.tasks = t s2 = Stage() t = Task() t.executable = ['/bin/date'] s2.tasks = t p.add_stages([s1, s2]) assert type(p.stages) == list assert p._stage_count == 2 assert p._cur_stage == 1 assert p.stages[0] == s1 assert p.stages[1] == s2
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def test_wfp_workflow_incomplete(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() assert wfp.workflow_incomplete() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') p.stages[0].state == states.SCHEDULING p.state == states.SCHEDULED for t in p.stages[0].tasks: t.state = states.COMPLETED import json import pika task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s-completedq-1' % amgr._sid, body=task_as_dict) amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp,)) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() assert not wfp.workflow_incomplete()
def test_pipeline_stage_assignment(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.tasks = t p.stages = s assert type(p.stages) == list assert p._stage_count == 1 assert p._cur_stage == 1 assert p.stages[0] == s
def test_stage_task_assignment(): """ ***Purpose***: Test if necessary attributes are automatically updates upon task assignment """ s = Stage() t = Task() t.executable = ['/bin/date'] s.tasks = t assert type(s.tasks) == set assert s._task_count == 1 assert t in s.tasks
def test_amgr_synchronizer(): logger = ru.Logger('radical.entk.temp_logger') profiler = ru.Profiler(name='radical.entk.temp') amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() p = Pipeline() s = Stage() # Create and add 100 tasks to the stage for cnt in range(100): t = Task() t.executable = ['some-executable-%s' % cnt] s.add_tasks(t) p.add_stages(s) p._assign_uid(amgr._sid) p._validate() amgr.workflow = [p] for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL # Start the synchronizer method in a thread amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() # Start the synchronizer method in a thread proc = Process(target=func_for_synchronizer_test, name='temp-proc', args=(amgr._sid, p, logger, profiler)) proc.start() proc.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULING assert p.stages[0].state == states.SCHEDULING assert p.state == states.SCHEDULING amgr._terminate_sync.set() sync_thread.join()
def generate_pipeline(): def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE <= MAX_NEW_STAGE: return True return False def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 shuffle(p.stages[CUR_NEW_STAGE:]) def func_on_false(): print 'Done' # Create a Pipeline object p = Pipeline() for s in range(MAX_NEW_STAGE+1): # Create a Stage object s1 = Stage() for i in range(CUR_TASKS): t1 = Task() t1.executable = '/bin/sleep' t1.arguments = [ '30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { condition': func_condition, on_true': func_on_true, on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['sleep'] t1.arguments = ['10'] s.add_tasks(t1) p.add_stages(s) return p
def test_wfp_enqueue(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_enqueue_test, name='temp-proc', args=(wfp,)) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULED assert p.stages[0].state == states.SCHEDULED assert p.state == states.SCHEDULING
def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] s.add_tasks(t1) p.add_stages(s) return p
def generate_pipeline(): def func_condition(): p.suspend() print 'Suspending pipeline %s for 10 seconds' %p.uid sleep(10) return True def func_on_true(): print 'Resuming pipeline %s' %p.uid p.resume() def func_on_false(): pass # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() for i in range(10): t1 = Task() t1.executable = '/bin/sleep' t1.arguments = ['30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = ['/bin/sleep'] t1.arguments = ['300'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = ['mv'] t1.arguments = ['temp','/tmp/'] t1.upload_input_data = ['%s/temp'%cur_dir] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def test_wfp_initialize_workflow(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) wfp = WFprocessor(sid='test', workflow=[p], pending_queue=list(), completed_queue=list(), mq_hostname=hostname, port=port, resubmit_failed=False) wfp._initialize_workflow() assert p.uid is not None assert p.stages[0].uid is not None for t in p.stages[0].tasks: assert t.uid is not None
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB for x in range(10): t1 = Task() t1.executable = 'cat' t1.arguments = ['file1.txt','file2.txt','>','output.txt'] t1.copy_input_data = ['$SHARED/file1.txt', '$SHARED/file2.txt'] t1.download_output_data = ['output.txt > %s/output_%s.txt' %(cur_dir,x+1)] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 s = Stage() for i in range(10): t = Task() t.executable = '/bin/sleep' t.arguments = [ '30'] s.add_tasks(t) # Add post-exec to the Stage s.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } p.add_stages(s)
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.name = 't1' t1.executable = ['/bin/false'] # t1.arguments = ['"Hello World"','>>','temp.txt'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' # Create 4K tasks to ensure we don't hit any RMQ connection drops for _ in range(4096): t1 = Task() t1.executable = ['/bin/echo'] t1.arguments = ['"Hello World"'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def test_amgr_run_mock(): p = Pipeline() s = Stage() t = Task() t.name = 'simulation' t.executable = ['/bin/date'] s.tasks = t p.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, 'project': '' } appman = Amgr(hostname=hostname, port=port, rts="mock") appman.resource_desc = res_dict appman.workflow = [p] appman.run()
def test_rp_da_scheduler_bw(): """ **Purpose**: Run an EnTK application on localhost """ p1 = Pipeline() p1.name = 'p1' n = 10 s1 = Stage() s1.name = 's1' for x in range(n): t = Task() t.name = 't%s' % x t.executable = ['/bin/hostname'] t.arguments = ['>', 'hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.lfs_per_process = 10 t.download_output_data = ['hostname.txt > s1_t%s_hostname.txt' % (x)] s1.add_tasks(t) p1.add_stages(s1) s2 = Stage() s2.name = 's2' for x in range(n): t = Task() t.executable = ['/bin/hostname'] t.arguments = ['>', 'hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.download_output_data = ['hostname.txt > s2_t%s_hostname.txt' % (x)] t.tag = 't%s' % x s2.add_tasks(t) p1.add_stages(s2) res_dict = { 'resource': 'ncsa.bw_aprun', 'walltime': 10, 'cpus': 128, 'project': 'gk4', 'queue': 'high' } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run() for i in range(n): assert open('s1_t%s_hostname.txt' % i, 'r').readline().strip() == open('s2_t%s_hostname.txt' % i, 'r').readline().strip() txts = glob('%s/*.txt' % os.getcwd()) for f in txts: os.remove(f)
def create_windowing_stage(cmt_file_db, param_path, task_counter): """This function creates the ASDF windowing stage. :param cmt_file_db: cmtfile in the database :param param_path: path to parameter file directory :param pipelinedir: path to pipeline directory :return: EnTK Stage """ # Get database parameter path databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Load Parameters DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # Windowing parameter file directory window_process_dir = os.path.join(param_path, "CreateWindows") # Window path list # Important step! This creates a windowing list prior to having created # the actual window path files. It is tested so it definitely works! # This way the windowing processes can be distributed for each ASDF file # pair on one processor (No MPI support!) window_path_list, _ = get_windowing_list(cmt_file_db, window_process_dir, verbose=False) # Process path function window_func = os.path.join(bin_path, "window_selection_asdf.py") # The following little work around help getting around the fact that # multiple tasks cannot read the same file. # Create two stages one for #bodywaves or general entries and one for # surfaces waves. bodywave_list = [] surfacewave_list = [] for file in window_path_list: name = os.path.basename(file) if "surface" in name: surfacewave_list.append(file) else: bodywave_list.append(file) stage_list = [] if len(bodywave_list) > 0: stage_list.append(bodywave_list) if len(surfacewave_list) > 0: stage_list.append(surfacewave_list) # List of stages stages = [] for window_list in stage_list: # Create Process Paths Stage (CPP) # Create a Stage object window_stage = Stage() window_stage.name = "Windowing" # Loop over process path files for window_path in window_list: # Create Task window_task = Task() # This way the task gets the name of the path file window_task.name = os.path.basename(window_path) window_task.pre_exec = [ # Conda activate DB_params["conda-activate"] ] window_task.executable = [DB_params["bin-python"]] # Assign exec # to the task # Create Argument list arguments = [window_func, "-f", window_path] if DB_params["verbose"]: arguments.append("-v") window_task.arguments = arguments # In the future maybe to database dir as a total log? window_task.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), window_task.name)) window_task.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), window_task.name)) window_stage.add_tasks(window_task) task_counter += 1 stages.append(window_stage) return stages, task_counter
def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(Cycle), 'r') as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() q.name = 'genpipeline{0}'.format(Cycle) #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(Cycle) self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid) for r in range(Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=MD_Executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=Cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.Book[Cycle - 1][ExchangeArray[r]]), '%s/prmtop' % (self.Book[0][r]), #'%s/prmtop'%(self.Tarball_path[0]), '%s/mdin_{0}'.format(r) % (self.Book[0][r]) ] #'%s/mdin'%(self.Book[0][r])] #'%s/mdin'%(self.Tarball_path[0])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(Cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(Cycle + 1) ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.arguments = [ 'TempEx.py', '{0}'.format(Replicas), '{0}'.format(Cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(Cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.Book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid) #print d #print self.Book return q
def main(cmt_filename): '''This tiny function runs shit Args: cmt_filename: str containing the path to the cmt solution that is supposed to be inverted for Usage: From the commandline: python pipeline <path/to/cmtsolution> ''' # Path to pipeline file pipelinepath = os.path.abspath(__file__) pipelinedir = os.path.dirname(pipelinepath) # Define parameter directory param_path = os.path.join(os.path.dirname(pipelinedir), "params") databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") DB_params = read_yaml_file(databaseparam_path) print(DB_params) # Earthquake specific database parameters # Dir and eq_id eq_dir, eq_id = get_eq_entry_path(DB_params["databasedir"], cmt_filename) # Earthquake file in the database cmt_file_db = os.path.join(eq_dir, "eq_" + eq_id + ".cmt") # Create a Pipeline object p = Pipeline() # ---- DATABASE ENTRY TASK ---------------------------------------------- # # Path to function create_database_func = os.path.join(pipelinedir, "01_Create_Database_Entry.py") # Create a Stage object database_entry = Stage() t1 = Task() t1.name = 'database-entry' t1.pre_exec = [ # Conda activate DB_params["conda-activate"] ] t1.executable = [DB_params['bin-python']] # Assign executable to the task t1.arguments = [create_database_func, os.path.abspath(cmt_filename)] # In the future maybe to database dir as a total log? t1.stdout = os.path.join(pipelinedir, "database-entry." + eq_id + ".stdout") t1.stderr = os.path.join(pipelinedir, "database-entry." + eq_id + ".stderr") # Add Task to the Stage database_entry.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(database_entry) # # ---- REQUEST DATA ----------------------------------------------------- # # # # Path to function # request_data_func = os.path.join(pipelinedir, "02_Request_Data.py") # # # Create a Stage object # datarequest = Stage() # # datarequest_t = Task() # datarequest_t.name = 'data-request' # datarequest_t.pre_exec = [ # Conda activate # DB_params["conda-activate"]] # datarequest_t.executable = [DB_params['bin-python']] # Assign executable # # to the task # datarequest_t.arguments = [request_data_func, cmt_file_db] # # # In the future maybe to database dir as a total log? # datarequest_t.stdout = os.path.join(pipelinedir, # "datarequest." + eq_id + ".stdout") # datarequest_t.stderr = os.path.join(pipelinedir, # "datarequest." + eq_id + ".stderr") # # # Add Task to the Stage # datarequest.add_tasks(datarequest_t) # # # Add Stage to the Pipeline # p.add_stages(datarequest) # ---- Write Sources ---------------------------------------------------- # # Path to function write_source_func = os.path.join(pipelinedir, "03_Write_Sources.py") # Create a Stage object w_sources = Stage() w_sources.name = 'Write-Sources' # Create Task for stage w_sources_t = Task() w_sources_t.name = 'Write-Sources' w_sources_t.pre_exec = [ # Conda activate DB_params["conda-activate"] ] w_sources_t.executable = [DB_params['bin-python']] # Assign executable # to the task w_sources_t.arguments = [write_source_func, cmt_file_db] # In the future maybe to database dir as a total log? w_sources_t.stdout = os.path.join(pipelinedir, "write_sources." + eq_id + ".stdout") w_sources_t.stderr = os.path.join(pipelinedir, "write_sources." + eq_id + ".stderr") # Add Task to the Stage w_sources.add_tasks(w_sources_t) # Add Stage to the Pipeline p.add_stages(w_sources) # ---- Run Specfem ----------------------------------------------------- # specfemspec_path = os.path.join(param_path, "SpecfemParams/SpecfemParams.yml") comp_and_modules_path = os.path.join( param_path, "SpecfemParams/" "CompilersAndModules.yml") # Load Parameters specfemspecs = read_yaml_file(specfemspec_path) cm_dict = read_yaml_file(comp_and_modules_path) attr = [ "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp", "CMT_depth", "CMT_lat", "CMT_lon" ] simdir = os.path.join(eq_dir, "CMT_SIMs") # Create a Stage object runSF3d = Stage() runSF3d.name = 'Simulation' for at in attr[0]: sf_t = Task() sf_t.name = 'run-' + at # Module Loading sf_t.pre_exec = [ # Get rid of existing modules 'module purge' ] for module in cm_dict["modulelist"]: sf_t.pre_exec.append("module load %s" % module) sf_t.pre_exec.append("module load %s" % cm_dict["gpu_module"]) # Change directory to specfem directories sf_t.pre_exec.append( # Change directory "cd %s" % os.path.join(simdir, at)) sf_t.executable = ['./bin/xspecfem3D'] # Assign executable # In the future maybe to database dir as a total log? sf_t.stdout = os.path.join(pipelinedir, "run_specfem." + eq_id + ".stdout") sf_t.stderr = os.path.join(pipelinedir, "run_specfem." + eq_id + ".stderr") sf_t.gpu_reqs = { 'processes': 6, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } # Add Task to the Stage runSF3d.add_tasks(sf_t) # Add Simulation stage to the Pipeline p.add_stages(runSF3d) # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Create a dictionary describe four mandatory keys: # resource, walltime, and cpus # resource is 'local.localhost' to execute locally res_dict = { 'resource': 'princeton.tiger_gpu', 'project': 'geo', 'queue': 'gpu', 'schema': 'local', 'walltime': 300, 'cpus': 2, 'gpus': 6 } # Assign resource request description to the Application Manager appman.resource_desc = res_dict # Assign the workflow as a set or list of Pipelines to the Application Manager # Note: The list order is not guaranteed to be preserved appman.workflow = set([p]) # Run the Application Manager appman.run()
Replicas = 24 Replica_Cores = 1 Pilot_Cores = Replicas * Replica_Cores for N_Stg in range(Stages): stg = Stage() ## initialization task_uids['Stage_%s'%N_Stg] = list() #####Initial MD stage if N_Stg == 0: for n0 in range(Replicas): t = Task() t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine t.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] t.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] t.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out'] t.cores = Replica_Cores stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) #####Exchange Stages elif N_Stg != 0 and N_Stg%2 = 1: t = Task() t.executable = ['python']
def test_wfp_dequeue(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL for t in p.stages[0].tasks: assert t.state == states.INITIAL p.stages[0].state == states.SCHEDULING p.state == states.SCHEDULED for t in p.stages[0].tasks: t.state = states.COMPLETED import json import pika task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s-completedq-1' % amgr._sid, body=task_as_dict) amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp, )) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() for t in p.stages[0].tasks: assert t.state == states.DONE assert p.stages[0].state == states.DONE assert p.state == states.DONE
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle """ with open("exchangePairs.dat","r") as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] #MD Engine, Blue Waters md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(Book[Cycle-1][r]), #'%s/mdin_{0}'.format(r)%(Book[k-1][r])] '%s/mdin'%(Book[Cycle-1][r])] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user? #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) q.add_stages(md_stg) ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print d #print Book return q #p = InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod) #q = Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod) #return (p, q)
for i in range(ntasks): t = Task() t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } t.executable = PYTHON t.arguments = [ f'{current_dir}/simulation.py', f'{run_dir}/simulations/all', ADIOS_XML, i, aggregator_dir ] s.add_tasks(t) t = Task() t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 0,
def generate_pipeline(): global CUR_TASKS, CUR_CORES, duration, MAX_NEW_STAGE def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE < MAX_NEW_STAGE - 1: return True return False def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 for t in p.stages[CUR_NEW_STAGE].tasks: cores = randint(1, 20) t.arguments = ['-c', str(cores), '-t', str(duration)] def func_on_false(): print 'Done' # Create a Pipeline object p = Pipeline() for s in range(MAX_NEW_STAGE + 1): # Create a Stage object s1 = Stage() for i in range(CUR_TASKS): t1 = Task() t1.pre_exec = [ 'export PATH=/u/sciteam/balasubr/modules/stress-ng-0.09.34:$PATH' ] t1.executable = ['stress-ng'] t1.arguments = ['-c', str(CUR_CORES), '-t', str(duration)] t1.cpu_reqs = { 'processes': 1, 'process_type': '', 'threads_per_process': CUR_CORES, 'thread_type': '' } # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
if __name__ == '__main__': # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # List to hold uids of Tasks of Stage 1 s1_task_uids = list() for cnt in range(10): # Create a Task object t = Task() t.executable = '/bin/echo' # Assign executable to the task t.arguments = ['I am task %s in %s' % (cnt, s1.name) ] # Assign arguments for the task executable # Add the Task to the Stage s1.add_tasks(t) # Add Task uid to list s1_task_uids.append(t.uid) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object s2 = Stage()
def generate_pipeline(self): pipeline = Pipeline() # generate replicas # create a wrapper task that assigns the values of replica_i and replica_j # ================= stage_1 = Stage() for _gibbs_step in range(self.n_gibbs_steps): task = Task() # assign replica_i and replica_j task.name = assign_replica_numbers task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%_gibbs_step] stage_1.add_tasks(task) pipeline.add_stages(stage_1) # replica exchange Metropolis criteria # invoke repex from RepEx 3.0 # ================= stage_2 = Stage() for _gibbs_step in range(self.n_gibbs_steps): task = Task() task.name = repex task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%_gibbs_step] stage_2.add_tasks(task) pipeline.add_stages(stage_2) # rotation (MC) # ================= stage_3 = Stage() for replica in range(self.number_of_replicas): task = Task() task.name = rotation task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%replica] stage_3.add_tasks(task) pipeline.add_stages(stage_3) # translation (MC) # ================= stage_4 = Stage() for replica in range(self.number_of_replicas): task = Task() task.name = rotation task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%replica] stage_4.add_tasks(task) pipeline.add_stages(stage_4) # propagation (MC) # ================= stage_5 = Stage() for replica in range(self.number_of_replicas): task = Task() task.name = rotation task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%replica] stage_5.add_tasks(task) pipeline.add_stages(stage_5) # energy matrix # for every replica pull the sampler state # compute the energy matrix of each thermo state in thermo_matrix, given that replica's sampler state # ================= stage_6 = Stage() for replica in range(self.number_of_replicas): for thermo_state in range(self.thermo_state) task = Task() task.name = rotation task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%replica] stage_6.add_tasks(task) pipeline.add_stages(stage_6) print 'TIES pipeline has', len(pipeline.stages), 'stages. Tasks counts:', [len(s.tasks) for s in pipeline.stages] return pipeline
def generate_MD_stage(num_MD=1): """ Function to generate MD stage. """ s1 = Stage() s1.name = 'MD' initial_MD = True outlier_filepath = '%s/restart_points.json' % outlier_path if os.path.exists(outlier_filepath): initial_MD = False outlier_file = open(outlier_filepath, 'r') outlier_list = json.load(outlier_file) outlier_file.close() # MD tasks time_stamp = int(time.time()) for i in range(num_MD): t1 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_exps/fs-pep/run_openmm.py t1.pre_exec = [ '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh' ] t1.pre_exec += ['module load cuda/9.1.85'] t1.pre_exec += ['conda activate %s' % conda_path] t1.pre_exec += [ 'export PYTHONPATH=%s/MD_exps:$PYTHONPATH' % base_path ] t1.pre_exec += ['cd %s' % md_path] t1.pre_exec += [ 'mkdir -p omm_runs_%d && cd omm_runs_%d' % (time_stamp + i, time_stamp + i) ] t1.executable = ['%s/bin/python' % conda_path] # run_openmm.py t1.arguments = ['%s/run_openmm.py' % md_path] if top_file: t1.arguments += ['--topol', top_file] # pick initial point of simulation if initial_MD or i >= len(outlier_list): t1.arguments += ['--pdb_file', pdb_file] # t1.arguments += ['--length', LEN_initial] # print "Running from initial frame for %d ns. " % LEN_initial elif outlier_list[i].endswith('pdb'): t1.arguments += ['--pdb_file', outlier_list[i]] # t1.arguments += ['--length', LEN_iter] t1.pre_exec += ['cp %s ./' % outlier_list[i]] # print "Running from outlier %s for %d ns" % (outlier_list[i], LEN_iter) elif outlier_list[i].endswith('chk'): t1.arguments += ['--pdb_file', pdb_file, '-c', outlier_list[i]] # t1.arguments += ['--length', LEN_iter] t1.pre_exec += ['cp %s ./' % outlier_list[i]] # print "Running from checkpoint %s for %d ns" % (outlier_list[i], LEN_iter) # how long to run the simulation if initial_MD: t1.arguments += ['--length', LEN_initial] else: t1.arguments += ['--length', LEN_iter] # assign hardware the task t1.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t1.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } # Add the MD task to the simulating stage s1.add_tasks(t1) return s1
def generate_ML_stage(num_ML=1): """ Function to generate the learning stage """ s3 = Stage() s3.name = 'learning' # learn task time_stamp = int(time.time()) for i in range(num_ML): t3 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py t3.pre_exec = [] #t3.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'] #t3.pre_exec += ['module load cuda/9.1.85'] #t3.pre_exec += ['conda activate %s' % conda_path] #t3.pre_exec += ['module unload python'] #t3.pre_exec += ['. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', # 'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', # 'conda deactivate', # 'conda deactivate', # 'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2'] #t3.pre_exec += ['module load ibm-wml-ce'] t3.pre_exec += [ 'module unload prrte', 'module unload python', 'module load xl', 'module load xalt', 'module load spectrum-mpi', 'module load cuda', 'module list' ] t3.pre_exec += [ 'export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path ] t3.pre_exec += ['cd %s' % cvae_path] dim = i + 3 cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp + i) t3.pre_exec += [ 'mkdir -p {0}/benchmarks && cd {0}'.format(cvae_dir) ] #t3.pre_exec += ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/ddlrun /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim)] t3.pre_exec += ['module load ibm-wml-ce', 'env'] t3.pre_exec += [ '. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', 'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', 'conda deactivate', 'conda deactivate', 'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2' ] t3.pre_exec += ['export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5"'] t3.pre_exec += [ 'jsrun --erf_input /gpfs/alpine/med110/scratch/atrifan2/covid19/PLPro/entk_cvae_md_hvd/RANKFILE /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim) ] #t3.pre_exec += ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/ddlrun /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim)] t3.executable = [ 'date' ] #t3.executable = ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python'] #t3.arguments = [ '%s/cvae/train_cvae.py' % (cvae_path), # '-f', '../bytes-train.tfrecords', # '--dim', dim] #t3.executable = ['%s/bin/python' % conda_path] # train_cvae.py #t3.arguments = ['%s/train_cvae.py' % cvae_path, # '--h5_file', '%s/cvae_input.h5' % agg_path, # '--dim', dim] t3.cpu_reqs = { 'processes': 6, 'process_type': 'MPI', 'threads_per_process': 4, 'thread_type': 'OpenMP' } t3.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } # Add the learn task to the learning stage s3.add_tasks(t3) return s3
def InitCycle( self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps ): # "Cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first Cycle """ #Initialize Pipeline #self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping ##Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=350, min_temp=250, replicas=Replicas, timesteps=timesteps) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("Input_Files.tar", "w") for name in ["prmtop", "inpcrd", "mdin"]: tar.add(name) for r in range(Replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(Replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ 'untar_input_files.py', 'Input_Files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=md_executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange #'%s/mdin'%tar_dict[0] #Testing only ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c', 'inpcrd', '-o', 'out_{0}'.format(r), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) #with open('logfile.log', 'a') as logfile: # logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n') # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.name = 'extsk0' ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.Book.append(md_dict) return p
def generate_aggregating_stage(): """ Function to concatenate the MD trajectory (h5 contact map) """ s2 = Stage() s2.name = 'S2.aggregating' # Aggregation task t2 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_to_CVAE/MD_to_CVAE.py t2.pre_exec = [ '. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh', 'conda activate %s' % cfg['conda_pytorch'], 'export LANG=en_US.utf-8', 'export LC_ALL=en_US.utf-8' ] # preprocessing for molecules' script, it needs files in a single # directory # the following pre-processing does: # 1) find all (.dcd) files from openmm results # 2) create a temp directory # 3) symlink them in the temp directory t2.pre_exec += [ 'export dcd_list=(`ls %s/MD_exps/%s/omm_runs_*/*dcd`)' % (cfg['base_path'], cfg['system_name']), 'export tmp_path=`mktemp -p %s/MD_to_CVAE/ -d`' % cfg['base_path'], 'for dcd in ${dcd_list[@]}; do tmp=$(basename $(dirname $dcd)); ln -s $dcd $tmp_path/$tmp.dcd; done', 'ln -s %s $tmp_path/prot.pdb' % cfg['pdb_file'], 'ls ${tmp_path}' ] t2.pre_exec += [ 'unset CUDA_VISIBLE_DEVICES', 'export OMP_NUM_THREADS=4' ] # - Each node takes 6 ranks # - each rank processes 2 files # - each iteration accumulates files to process cnt_constraint = min(cfg['node_counts'] * 6, cfg['md_counts'] * max(1, CUR_STAGE) // 2) t2.executable = ['%s/bin/python' % (cfg['conda_pytorch']) ] # MD_to_CVAE.py t2.arguments = [ '%s/scripts/traj_to_dset.py' % cfg['molecules_path'], '-t', '$tmp_path', '-p', '%s/Parameters/input_protein/prot.pdb' % cfg['base_path'], '-r', '%s/Parameters/input_protein/prot.pdb' % cfg['base_path'], '-o', '%s/MD_to_CVAE/cvae_input.h5' % cfg['base_path'], '--contact_maps_parameters', "kernel_type=threshold,threshold=%s" % cfg['cutoff'], '-s', cfg['selection'], '--rmsd', '--fnc', '--contact_map', '--point_cloud', '--num_workers', 2, '--distributed', '--verbose' ] # Add the aggregation task to the aggreagating stage t2.cpu_reqs = { 'processes': 1 * cnt_constraint, 'process_type': "MPI", 'threads_per_process': 6 * 4, 'thread_type': 'OpenMP' } s2.add_tasks(t2) return s2
def get_pipeline(instance, iterations): # Create a Pipeline object p = Pipeline() # Create Stage 1 s1 = Stage() # Create a Task t1 = Task() t1.pre_exec = ['module load python/2.7.7-anaconda'] t1.executable = ['python'] t1.arguments = [ 'analysis_1.py', '--template', 'CB7G3_template.mdp', '--newname', 'CB7G3_run.mdp', '--wldelta', '2', '--equilibrated', 'False', '--lambda_state', '0', '--seed', '%s' % SEED ] t1.cores = 1 t1.copy_input_data = [ '$SHARED/CB7G3_template.mdp', '$SHARED/analysis_1.py' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) for it in range(1, iterations + 1): # Create Stage 2 s2 = Stage() # Create a Task t2 = Task() t2.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if it == 0: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s1.uid, t1.uid), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s4.uid, t4.uid), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.uid, s3.uid, t3.uid) ] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() # Create a Task t3 = Task() t3.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.uid, s2.uid, t2.uid) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format(it, instance) ] t3.download_output_data = [ 'CB7G3.xtc > CB7G3_run{1}_gen{0}.xtc'.format(it, instance), 'CB7G3.log > CB7G3_run{1}_gen{0}.log'.format(it, instance), 'CB7G3_dhdl.xvg > CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.gro > CB7G3_run{1}_gen{0}.gro'.format(it, instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() # Create a Task t4 = Task() t4.pre_exec = [ 'module load python', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis/alchemical_analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', #'--prev_data=%s'%DATA_LOC '--gen={0}'.format(it, instance), '--run={1}'.format(it, instance) ] t4.cores = 1 t4.link_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > results_run{1}_gen{0}.txt'.format( it, instance), 'STDOUT > stdout_run{1}_gen{0}'.format(it, instance), 'STDERR > stderr_run{1}_gen{0}'.format(it, instance), 'CB7G3_run.mdp > CB7G3_run{1}_gen{0}.mdp'.format(it, instance), 'results_average.txt > results_average_run{1}_gen{0}.txt'.format( it, instance) ] # Add the Task to the Stage s4.add_tasks(t4) # Add Stage to the Pipeline p.add_stages(s4) return p
def test_task_to_dict(): ''' **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary ''' t = Task() d = t.to_dict() assert d == {'uid' : 'task.0000', 'name' : '', 'state' : states.INITIAL, 'state_history' : [states.INITIAL], 'pre_exec' : [], 'executable' : '', 'arguments' : [], 'post_exec' : [], 'cpu_reqs' : {'processes' : 1, 'process_type' : None, 'threads_per_process' : 1, 'thread_type' : None}, 'gpu_reqs' : {'processes' : 0, 'process_type' : None, 'threads_per_process' : 0, 'thread_type' : None}, 'lfs_per_process' : 0, 'upload_input_data' : [], 'copy_input_data' : [], 'link_input_data' : [], 'link_output_data' : [], 'move_input_data' : [], 'copy_output_data' : [], 'move_output_data' : [], 'download_output_data' : [], 'sandbox' : '', 'stdout' : '', 'stderr' : '', 'exit_code' : None, 'path' : None, 'tag' : None, 'parent_stage' : {'uid' : None, 'name' : None}, 'parent_pipeline' : {'uid' : None, 'name' : None}} t = Task() t.uid = 'test.0017' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == {'uid' : 'test.0017', 'name' : 'new', 'state' : states.INITIAL, 'state_history' : [states.INITIAL], 'pre_exec' : ['module load abc'], 'executable' : 'sleep', 'arguments' : ['10'], 'post_exec' : [], 'cpu_reqs' : {'processes' : 10, 'process_type' : None, 'threads_per_process' : 2, 'thread_type' : None}, 'gpu_reqs' : {'processes' : 5, 'process_type' : None, 'threads_per_process' : 3, 'thread_type' : None}, 'lfs_per_process' : 1024, 'upload_input_data' : ['test1'], 'copy_input_data' : ['test2'], 'link_input_data' : ['test3'], 'link_output_data' : [], 'move_input_data' : ['test4'], 'copy_output_data' : ['test5'], 'move_output_data' : ['test6'], 'download_output_data' : ['test7'], 'sandbox' : '', 'stdout' : 'out', 'stderr' : 'err', 'exit_code' : 1, 'path' : 'a/b/c', 'tag' : 'task.0010', 'parent_stage' : {'uid': 's1', 'name' : 'stage1'}, 'parent_pipeline' : {'uid': 'p1', 'name' : 'pipeline1'}} t.executable = 'sleep' d = t.to_dict() assert d == {'uid' : 'test.0017', 'name' : 'new', 'state' : states.INITIAL, 'state_history' : [states.INITIAL], 'pre_exec' : ['module load abc'], 'executable' : 'sleep', 'arguments' : ['10'], 'post_exec' : [], 'cpu_reqs' : {'processes' : 10, 'process_type' : None, 'threads_per_process' : 2, 'thread_type' : None}, 'gpu_reqs' : {'processes' : 5, 'process_type' : None, 'threads_per_process' : 3, 'thread_type' : None}, 'lfs_per_process' : 1024, 'upload_input_data' : ['test1'], 'copy_input_data' : ['test2'], 'link_input_data' : ['test3'], 'link_output_data' : [], 'move_input_data' : ['test4'], 'copy_output_data' : ['test5'], 'move_output_data' : ['test6'], 'download_output_data' : ['test7'], 'sandbox' : '', 'stdout' : 'out', 'stderr' : 'err', 'exit_code' : 1, 'path' : 'a/b/c', 'tag' : 'task.0010', 'parent_stage' : {'uid': 's1', 'name' : 'stage1'}, 'parent_pipeline' : {'uid': 'p1', 'name' : 'pipeline1'}}
def GenerateTask(tcfg, ecfg, pipe_name, stage_name, task_name): # Initialize a task object t = Task() # Define magic variable dictionary mvar_dict = {"PIPELINE_ID": pipe_name} # Give this task object a name t.name = task_name # Pre exec let you load modules, set environment before executing the workload if tcfg['pre_exec'] != "": t.pre_exec = [tcfg['pre_exec']] # Executable to use for the task t.executable = tcfg['executable'] # If there's a user-defined input file (likely for genmod modules), add it to the # options list and upload file list if needed if "input_data_file" in tcfg['options']: tcfg['upload_input_data'].append( os.path.join(ecfg['exp_dir'], "input", ecfg['input_data_file'])) # List of arguments for the executable t.arguments = [tcfg['script']] + match_options(tcfg['options'], ecfg['options']) # CPU requirements for this task t.cpu_threads = { 'processes': tcfg['cpu']['processes'], 'process-type': tcfg['cpu']['process-type'], 'threads-per-process': tcfg['cpu']['threads-per-process'], 'thread-type': tcfg['cpu']['thread-type'], } # Upload data from your local machine to the remote machine # Note: Remote machine can be the local machine t.upload_input_data = tcfg['upload_input_data'] # Copy data from other stages/tasks for use in this task copy_list = [] if "copy_input_data" in tcfg.keys(): for copy_stage in tcfg['copy_input_data'].keys(): for copy_task in tcfg['copy_input_data'][copy_stage].keys(): loc = "$Pipeline_{0}_Stage_{1}_Task_{2}".format( pipe_name, copy_stage, copy_task) copy_list.extend([ '{0}/{1}'.format(loc, mvar_replace_dict(mvar_dict, x)) for x in tcfg['copy_input_data'][copy_stage][copy_task] ]) # Append the copy list (if any) to the task object t.copy_input_data = copy_list # Set the download data for the task download_list = [] outdir = os.path.join(ecfg['exp_dir'], "output") if "download_output_data" in tcfg.keys(): download_list.extend([ '{0} > {1}/{0}'.format(mvar_replace_dict(mvar_dict, x), outdir) for x in tcfg['download_output_data'] ]) # Append the download list to this task t.download_output_data = download_list # Return the task object return (t)
if __name__ == '__main__': # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # List to hold uids of Tasks of Stage 1 s1_task_uids = list() for cnt in range(10): # Create a Task object t = Task() t.executable = ['/bin/echo'] # Assign executable to the task t.arguments = ['I am task %s in %s'%(cnt, s1.name)] # Assign arguments for the task executable # Add the Task to the Stage s1.add_tasks(t) # Add Task uid to list s1_task_uids.append(t.uid) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object s2 = Stage()
def test_task_exceptions(s, l, i, b): ''' **Purpose**: Test if all attribute assignments raise exceptions for invalid values ''' t = Task() data_type = [s, l, i, b] for data in data_type: # special case due to backward compatibility if not isinstance(data, str) and \ not isinstance(data, list): with pytest.raises(ree.TypeError): t.executable = data if not isinstance(data, str): with pytest.raises(ree.TypeError): t.name = data with pytest.raises(ree.TypeError): t.path = data with pytest.raises(ree.TypeError): t.parent_stage = data with pytest.raises(ree.TypeError): t.parent_pipeline = data with pytest.raises(ree.TypeError): t.stdout = data with pytest.raises(ree.TypeError): t.stderr = data if not isinstance(data, list): with pytest.raises(ree.TypeError): t.pre_exec = data with pytest.raises(ree.TypeError): t.arguments = data with pytest.raises(ree.TypeError): t.post_exec = data with pytest.raises(ree.TypeError): t.upload_input_data = data with pytest.raises(ree.TypeError): t.copy_input_data = data with pytest.raises(ree.TypeError): t.link_input_data = data with pytest.raises(ree.TypeError): t.move_input_data = data with pytest.raises(ree.TypeError): t.copy_output_data = data with pytest.raises(ree.TypeError): t.download_output_data = data with pytest.raises(ree.TypeError): t.move_output_data = data if not isinstance(data, str) and \ not isinstance(data, str): with pytest.raises(ree.ValueError): t.cpu_reqs = {'processes' : 1, 'process_type' : data, 'threads_per_process': 1, 'thread_type' : None} t.cpu_reqs = {'processes' : 1, 'process_type' : None, 'threads_per_process': 1, 'thread_type' : data } t.gpu_reqs = {'processes' : 1, 'process_type' : data, 'threads_per_process': 1, 'thread_type' : None } t.gpu_reqs = {'processes' : 1, 'process_type' : None, 'threads_per_process': 1, 'thread_type' : data} if not isinstance(data, int): with pytest.raises(ree.TypeError): t.cpu_reqs = {'processes' : data, 'process_type' : None, 'threads_per_process' : 1, 'thread_type' : None} with pytest.raises(ree.TypeError): t.cpu_reqs = {'processes' : 1, 'process_type' : None, 'threads_per_process' : data, 'thread_type' : None} with pytest.raises(ree.TypeError): t.gpu_reqs = {'processes' : data, 'process_type' : None, 'threads_per_process' : 1, 'thread_type' : None} with pytest.raises(ree.TypeError): t.gpu_reqs = {'processes' : 1, 'process_type' : None, 'threads_per_process' : data, 'thread_type' : None}
def InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod): # "Cycle" = 1 MD stage plus the subsequent exchange computation #Initialize Pipeline p = Pipeline() md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping #Create Tarball of input data #Create Untar Stage untar_stg = Stage() #Untar Task untar_tsk = Task() untar_tsk.executable = ['python'] untar_tsk.upload_input_data = ['untar_input_files.py','../../Input_Files.tar'] untar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, untar_stg.uid, untar_tsk.uid) print tar_dict[0] # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() # MD tasks for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] md_tsk.link_input_data += ['%s/inpcrd'%tar_dict[0], '%s/prmtop'%tar_dict[0], #'%s/mdin_{0}'.format(r)%tar_dict[0] '%s/mdin'%tar_dict[0] ] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Should be abstracted from the user? md_tsk.arguments = ['-O','-p','prmtop', '-i', 'mdin', #'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c','inpcrd','-o','out_{0}'.format(r), '-inf','mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.executable = ['python'] #ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print Book return p
# VM, set "RMQ_HOSTNAME" and "RMQ_PORT" in the session where you are running # this script. hostname = os.environ.get('RMQ_HOSTNAME', 'localhost') port = os.environ.get('RMQ_PORT', 5672) if __name__ == '__main__': # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = '/bin/bash' t1.arguments = [ '-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object s2 = Stage() s2.name = 'Stage 2' # Create a Task object
# Create file structure 'mkdir -p DATABASES_MPI', 'mkdir -p OUTPUT_FILES', # Copy data 'cp -r /projects/TROMP/entk/specfem3d_globe/DATA .', # Copy input files to output directory 'cp DATA/Par_file OUTPUT_FILES/', 'cp DATA/CMTSOLUTION OUTPUT_FILES/', 'cp DATA/STATIONS OUTPUT_FILES', # Link binaries 'ln -s /projects/TROMP/entk/specfem3d_globe/bin .', ] t1.executable = ['./bin/xmeshfem3D'] t1.cpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.post_exec = [ # Tar output files 'tar -zcf specfem_data.tar.gz bin DATA DATABASES_MPI OUTPUT_FILES', # Copy to scratch folder 'cp specfem_data.tar.gz /projects/TROMP/entk/scratch/', ] t1.download_output_data = ['STDOUT', 'STDERR', 'specfem_data.tar.gz']
p = Pipeline() # Second stage to perform one specfem task specfem_stage = Stage() t1 = Task() t1.pre_exec = [ # Modules to be loaded 'module purge', 'module load intel/18.0', 'module load intel-mpi/intel/2018.3', # Untar the input data 'tar -zxf specfem_data.tar.gz', ] t1.executable = ['./bin/xspecfem3D'] t1.cpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.copy_input_data = ['/projects/TROMP/entk/scratch/specfem_data.tar.gz'] t1.post_exec = [ # Tar output files 'tar -zcf specfem_final.tar.gz bin DATA DATABASES_MPI OUTPUT_FILES', # Copy to scratch folder 'cp specfem_final.tar.gz /projects/TROMP/entk/scratch/', ] t1.download_output_data = ['STDOUT', 'STDERR', 'specfem_final.tar.gz']
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) num_replicas = int(Kconfig.num_replicas) #if cur_iter==0: # restart_iter=0 #else: # restart_iter=cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['mv'] pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) pre_proc_stage2 = Stage() pre_proc_task2 = Task() pre_proc_task2.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task2.executable = ['ls'] pre_proc_task2.arguments = ['-l'] pre_proc_task2.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file), '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference) ] pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) pre_proc_stage2.add_tasks(pre_proc_task2) wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() def_rep_per_thread = int(np.ceil(num_replicas / num_parallel)) num_allocated_rep = 0 num_used_threads = 0 while (num_allocated_rep < num_replicas): if (num_used_threads == num_parallel): print("ALLERT tried use more gpus than allocated") if ((num_replicas - num_allocated_rep) > def_rep_per_thread): use_replicas = def_rep_per_thread else: use_replicas = (num_replicas - num_allocated_rep) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module load bwpy/1.2.5', 'module add bwpy-mpi', 'module add fftw/3.3.4.10', 'module add cray-netcdf', 'module add cudatoolkit/9.1.85_3.10-1.0502.df1cc54.3.1', 'module add cmake/3.1.3', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy4/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--trajstride', str(Kconfig.trajstride), '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log' ] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/system-5.xml > system-5.xml', '$SHARED/integrator-5.xml > integrator-5.xml' ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr if str(Kconfig.strategy) == 'extend': copy_out = [] for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter + 1), idx) ] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module load bwpy/1.2.5', 'module add bwpy-mpi', 'module add fftw/3.3.4.10', 'module add cray-netcdf', 'module add cudatoolkit/9.1.85_3.10-1.0502.df1cc54.3.1', 'module add cmake/3.1.3', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy4/bin/activate', 'export tasks=tica_msm_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['python'] ana_task.arguments = [ 'run-tica-msm.py', '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '--ref', str(Kconfig.md_reference), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': 'MPI', 'threads_per_process': 16, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/run-tica-msm.py > run-tica-msm.py', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def generate_pipeline(): def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE <= MAX_NEW_STAGE: return True return False def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 s = Stage() for i in range(10): t = Task() t.executable = ['sleep'] t.arguments = ['30'] s.add_tasks(t) # Add post-exec to the Stage s.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } p.add_stages(s) def func_on_false(): print 'Done' # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() for i in range(10): t1 = Task() t1.executable = ['sleep'] t1.arguments = ['30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
for cnt in range(10): p = Pipeline() #Create the stages in a pipline for cnt2 in range(12): # Create a Stage object s = Stage() # Create a Task object t = Task() if cnt2 % 2 == 0: t.name = 'a %s' % (cnt2 + 1) else: t.name = 'b %s' % (cnt2 + 1) #The task does nothing ("sleeps") for one second t.executable = '$HOME/tem/stress-ng' # Assign executable to the task t.arguments = ['-c', '1', '-t', '100'] #t.arguments = ['100'] # Assign arguments for the task executable # Add the Task to the Stage s.add_tasks(t) # Add Stage to the Pipeline p.add_stages(s) Pipelines.append(p) # Create Application Manager appman = AppManager(hostname=hostname, port=port, autoterminate=False, username=username, password=password) # Create a dictionary describe four mandatory keys:
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.name = 't1' t1.executable = '/bin/bash' t1.arguments = [ '-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object to hold character count tasks s2 = Stage() s2.name = 's2' s2_task_uids = [] for cnt in range(30): # Create a Task object t2 = Task() t2.name = 't%s' % (cnt + 1) t2.executable = '/bin/bash' t2.arguments = [ '-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt' ] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/output.txt' % (p.name, s1.name, t1.name) ] # Add the Task to the Stage s2.add_tasks(t2) s2_task_uids.append(t2.name) # Add Stage to the Pipeline p.add_stages(s2) # Create another Stage object to hold checksum tasks s3 = Stage() s3.name = 's3' for cnt in range(30): # Create a Task object t3 = Task() t3.name = 't%s' % (cnt + 1) t3.executable = '/bin/bash' t3.arguments = ['-l', '-c', 'sha1sum ccount.txt > chksum.txt'] # Copy data from the task in the first stage to the current task's location t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/ccount.txt' % (p.name, s2.name, s2_task_uids[cnt]) ] # Download the output of the current task to the current location t3.download_output_data = ['chksum.txt > chksum_%s.txt' % cnt] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) return p
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory) num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) config_file = str(args.Kconfig).rsplit('/', 1)[-1] try: systemxml = str(Kconfig.systemxml) except: systemxml = 'system-5.xml' try: integratorxml = str(Kconfig.integratorxml) except: integratorxml = 'integrator-5.xml' md_settings = Kconfig.md_env if Kconfig.env_ana_same == 'True': ana_settings = md_settings else: ana_settings = Kconfig.ana_env print("set", num_parallel, md_settings) iter_found = 0 while len(glob.glob('%s/iter%s_input*.pdb' % (combined_path, iter_found))) >= num_replicas: iter_found += 1 cur_iter = max(0, iter_found - 1) print("cur_iter", cur_iter) if cur_iter == 0: pre_proc_stage2 = Stage() pre_proc_task2 = Task() pre_proc_task2.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task2.executable = ['ls'] pre_proc_task2.arguments = ['-l'] copy_arr = [ '$SHARED/%s > %s/%s' % (config_file, combined_path, config_file), '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file) ] if Kconfig.md_run_file != Kconfig.md_reference: copy_arr = copy_arr + [ '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference) ] if str(Kconfig.strategy) != 'extend': copy_arr = copy_arr + [ '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana) ] print("copy_arr", copy_arr) pre_proc_task2.copy_input_data = copy_arr pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) pre_proc_stage2.add_tasks(pre_proc_task2) wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ start_iter = cur_iter print("finished prep") while (cur_iter < int(Kconfig.num_iterations) and cur_iter < start_iter + 1): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() num_allocated_rep = 0 num_used_parallel = 0 while (num_allocated_rep < num_replicas): def_rep_per_thread = int( math.ceil( float(num_replicas - num_allocated_rep) / float(num_parallel - num_used_parallel))) use_replicas = min(def_rep_per_thread, num_replicas - num_allocated_rep) print("u", cur_iter, use_replicas, num_replicas, num_parallel, def_rep_per_thread, num_allocated_rep, num_used_parallel) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } sim_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 20, 'thread_type': 'OpenMP' } sim_task.arguments = [ 'run_openmm.py', '--Kconfig', config_file, '--idxstart', str(num_allocated_rep), '--idxend', str(num_allocated_rep + use_replicas), '--path', combined_path, '>', 'md.log' ] #'--trajstride', str(Kconfig.trajstride),'--Kconfig', str(args.Kconfig), #'--idxstart',str(num_allocated_rep), '--idxend',str((num_allocated_rep+use_replicas)), #'--path',combined_path,'--iter',str(cur_iter), #'--md_steps',str(Kconfig.md_steps), '--save_traj', 'True','>', 'md.log'] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (systemxml, systemxml), '$SHARED/%s > %s' % (integratorxml, integratorxml), '$SHARED/%s > %s' % (config_file, config_file) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (config_file, config_file) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] #if str(Kconfig.strategy)=='extend': # for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)] #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): ## #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas num_used_parallel = num_used_parallel + 1 sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) if str(Kconfig.strategy) != 'extend': for anatask in range(1): print("analysis task", anatask) ana_task = Task() ana_task.executable = ['python'] pre_exec_arr = ana_settings ana_task.pre_exec = pre_exec_arr ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (config_file, config_file) ] ana_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } ana_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 20, 'thread_type': 'OpenMP' } ana_task.arguments = [ script_ana, '--Kconfig', config_file, '>', "analysis.log" ] ana_task.copy_output_data = [ 'analysis.log > %s/analysis_iter%s_r%s.log' % (combined_path, cur_iter, anatask) ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, sim_stage.uid, ana_task.uid) sim_stage.add_tasks(ana_task) wf.add_stages(sim_stage) cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
# VM, set "RMQ_HOSTNAME" and "RMQ_PORT" in the session where you are running # this script. hostname = os.environ.get('RMQ_HOSTNAME', 'localhost') port = os.environ.get('RMQ_PORT', 5672) if __name__ == '__main__': # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = ['/bin/bash'] t1.arguments = ['-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object s2 = Stage() s2.name = 'Stage 2' # Create a Task object t2 = Task()