def test_dict_to_task(): # make sure the type checks kick in d = {'name' : 1} with pytest.raises(ree.TypeError): Task(from_dict=d) d = {'name' : 'foo', 'pre_exec' : ['bar'], 'executable': 'buz', 'arguments' : ['baz', 'fiz'], 'cpu_reqs' : {'processes' : 1, 'process_type' : None, 'threads_per_process': 1, 'thread_type' : None}, 'gpu_reqs' : {'processes' : 0, 'process_type' : None, 'threads_per_process': 0, 'thread_type' : None}} t = Task(from_dict=d) for k,v in d.items(): assert(t.__getattribute__(k) == v), '%s != %s' \ % (t.__getattribute__(k), v)
def generate_pipeline(name, stages): # Create a Pipeline object p = Pipeline() p.name = name for s_cnt in range(stages): # Create a Stage object s = Stage() s.name = 'Stage %s' % s_cnt for t_cnt in range(5): # Create a Task object t = Task() t.name = 'my-task' # Assign a name to the task (optional) t.executable = ['/bin/echo'] # Assign executable to the task # Assign arguments for the task executable t.arguments = ['I am task %s in %s in %s' % (t_cnt, s_cnt, name)] # Add the Task to the Stage s.add_tasks(t) # Add Stage to the Pipeline p.add_stages(s) return p
def test_wfp_initialization(s, i, b, l): p = Pipeline() st = Stage() t = Task() t.executable = ['/bin/date'] st.add_tasks(t) p.add_stages(st) wfp = WFprocessor(sid='rp.session.local.0000', workflow=set([p]), pending_queue=['pending'], completed_queue=['completed'], mq_hostname=hostname, port=port, resubmit_failed=True) assert len(wfp._uid.split('.')) == 2 assert 'wfprocessor' == wfp._uid.split('.')[0] assert wfp._pending_queue == ['pending'] assert wfp._completed_queue == ['completed'] assert wfp._mq_hostname == hostname assert wfp._port == port assert wfp._wfp_process == None assert wfp._workflow == set([p]) if not isinstance(s, unicode): wfp = WFprocessor(sid=s, workflow=set([p]), pending_queue=l, completed_queue=l, mq_hostname=s, port=i, resubmit_failed=b)
def test_wfp_start_processor(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) assert wfp.start_processor() assert not wfp._enqueue_thread assert not wfp._dequeue_thread assert not wfp._enqueue_thread_terminate.is_set() assert not wfp._dequeue_thread_terminate.is_set() assert not wfp._wfp_terminate.is_set() assert wfp._wfp_process.is_alive() wfp._wfp_terminate.set() wfp._wfp_process.join()
def test_issue_239(): t = Task() t.cpu_reqs = {'processes': 1} assert t.cpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None} t.cpu_reqs = {'threads_per_process': 1} assert t.cpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None} t.gpu_reqs = {'processes': 1} assert t.gpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None} t.gpu_reqs = {'threads_per_process': 1} assert t.gpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None}
def generate_aggregating_stage(): """ Function to concatenate the MD trajectory (h5 contact map) """ s2 = Stage() s2.name = 'aggregating' # Aggregation task t2 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_to_CVAE/MD_to_CVAE.py t2.pre_exec = [] t2.pre_exec += [ '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh' ] t2.pre_exec += ['conda activate rp.copy'] t2.pre_exec += [ 'cd /gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_to_CVAE' ] t2.executable = ['/ccs/home/hrlee/.conda/envs/rp.copy/bin/python' ] # MD_to_CVAE.py t2.arguments = [ '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_to_CVAE/MD_to_CVAE.py', '-f', '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_exps/fs-pep' ] # Add the aggregation task to the aggreagating stage s2.add_tasks(t2) return s2
def test_wfp_initialization(s, b, l): p = Pipeline() stage = Stage() t = Task() t.executable = '/bin/date' stage.add_tasks(t) p.add_stages(stage) rmq_conn_params = pika.ConnectionParameters(host=hostname, port=port) wfp = WFprocessor(sid='rp.session.local.0000', workflow=set([p]), pending_queue=['pending'], completed_queue=['completed'], rmq_conn_params=rmq_conn_params, resubmit_failed=True) assert len(wfp._uid.split('.')) == 2 assert 'wfprocessor' == wfp._uid.split('.')[0] assert wfp._pending_queue == ['pending'] assert wfp._completed_queue == ['completed'] assert wfp._rmq_conn_params == rmq_conn_params assert wfp._wfp_process is None assert wfp._workflow == set([p]) if not isinstance(s, str): wfp = WFprocessor(sid=s, workflow=set([p]), pending_queue=l, completed_queue=l, rmq_conn_params=rmq_conn_params, resubmit_failed=b)
def test_pipeline_decrement_stage(): p = Pipeline() s1 = Stage() t = Task() t.executable = ['/bin/date'] s1.tasks = t s2 = Stage() t = Task() t.executable = ['/bin/date'] s2.tasks = t p.add_stages([s1, s2]) p._increment_stage() p._increment_stage() assert p._stage_count == 2 assert p._cur_stage == 2 assert p._completed_flag.is_set() == True p._decrement_stage() assert p._stage_count == 2 assert p._cur_stage == 1 assert p._completed_flag.is_set() == False p._decrement_stage() assert p._stage_count == 2 assert p._cur_stage == 0 assert p._completed_flag.is_set() == False
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' s1_task_uids = [] for cnt in range(8): # Create a Task object t1 = Task() t1.name = 't%s' % (cnt + 1) # to make a python script executable: # 1) add to first line "shebang": #!/usr/bin/env python # 2) chmod +x MultiprocessingCode.py # The executable always has to be in the Target Machine t1.executable = '~/MultiprocessingCode.py' # Add the Task to the Stage s1.add_tasks(t1) s1_task_uids.append(t1.name) # Add Stage to the Pipeline p.add_stages(s1) return p
def test_amgr_synchronizer(): amgr = Amgr(hostname=host, port=port, username=username, password=password) amgr._setup_mqs() p = Pipeline() s = Stage() # Create and add 10 tasks to the stage for cnt in range(10): t = Task() t.executable = 'some-executable-%s' % cnt s.add_tasks(t) p.add_stages(s) p._validate() amgr.workflow = [p] sid = 'test.0016' rmgr = BaseRmgr({}, sid, None, {}) tmgr = BaseTmgr(sid=sid, pending_queue=['pending-1'], completed_queue=['completed-1'], rmgr=rmgr, rmq_conn_params=amgr._rmq_conn_params, rts=None) amgr._rmgr = rmgr rmgr._task_manager = tmgr for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL # Start the synchronizer method in a thread amgr._terminate_sync = mt.Event() sync_thread = mt.Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() # Start the synchronizer method in a thread proc = mp.Process(target=func_for_synchronizer_test, name='temp-proc', args=(amgr._sid, p, tmgr)) proc.start() proc.join() # Wait for AppManager to finish the message exchange # no need to set *)terminate_sync* but a timeout instead # amgr._terminate_sync.set() sync_thread.join(15) for t in p.stages[0].tasks: assert t.state == states.COMPLETED
def _task(self, pipeline_id, model_id, time_stamp): # Specify training hyperparameters # Select latent dimension for CVAE [3, ... self.num_ml] latent_dim = 3 + model_id epochs = 100 batch_size = 512 cvae_dir = f'{self.prefix}/data/ml/pipeline-{pipeline_id}' cm_data_path = f'{self.prefix}/data/preproc/pipeline-{pipeline_id}/cvae-input.h5' task = Task() self.load_environment(task) self.set_python_executable(task) self.assign_hardware(task) # Create output directory for generated files. task.pre_exec.extend([f'mkdir -p {cvae_dir}']) # Specify python ML task with arguments task.arguments = [f'{self.prefix}/examples/cvae_dbscan/scripts/cvae.py', '--input', cm_data_path, '--out', cvae_dir, '--model_id', f'{model_id}', '--epochs', f'{epochs}', '--batch_size', f'{batch_size}', '--latent_dim', f'{latent_dim}'] return task
def test_wfp_check_processor(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp.start_processor() assert wfp.check_processor() wfp.terminate_processor() assert not wfp.check_processor()
def generate_pipeline(name, stages): # Create a Pipeline object p = Pipeline() p.name = name for s_cnt in range(stages): # Create a Stage object s = Stage() s.name = 'Stage %s'%s_cnt for t_cnt in range(5): # Create a Task object t = Task() t.name = 'my-task' # Assign a name to the task (optional) t.executable = '/bin/echo' # Assign executable to the task # Assign arguments for the task executable t.arguments = ['I am task %s in %s in %s'%(t_cnt, s_cnt, name)] # Add the Task to the Stage s.add_tasks(t) # Add Stage to the Pipeline p.add_stages(s) return p
def tasks(self, pipeline_id): """ Returns ------- Set of tasks to be added to the preprocessing stage. """ md_dir = f'{self.prefix}/data/md/pipeline-{pipeline_id}' preproc_dir = f'{self.prefix}/data/preproc/pipeline-{pipeline_id}' task = Task() self.load_environment(task) self.set_python_executable(task) self.assign_hardware(task) # Create output directory for generated files. task.pre_exec.extend([f'mkdir -p {preproc_dir}']) # Specify python preprocessing task with arguments task.arguments = [ f'{self.prefix}/examples/cvae_dbscan/scripts/contact_map.py', '--sim_path', md_dir, '--out', preproc_dir ] return {task}
def test_wfp_check_processor(): p = Pipeline() s = Stage() t = Task() t.executable = '/bin/date' s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, rmq_conn_params=amgr._rmq_conn_params, resubmit_failed=False) wfp.start_processor() assert wfp.check_processor() wfp.terminate_processor() assert not wfp.check_processor()
def test_sync_with_master(self, mocked_init, mocked_Logger, mocked_Profiler): # -------------------------------------------------------------------------- # def component_execution(packets, conn_params, queue): tmgr = BaseTmgr(None, None, None, None, None, None) tmgr._log = mocked_Logger tmgr._prof = mocked_Profiler mq_connection2 = pika.BlockingConnection(rmq_conn_params) mq_channel2 = mq_connection2.channel() for obj_type, obj, in packets: tmgr._sync_with_master(obj, obj_type, mq_channel2, conn_params, queue) if mq_channel2.is_open: mq_channel2.close() task = Task() task.parent_stage = {'uid':'stage.0000', 'name': 'stage.0000'} packets = [('Task', task)] stage = Stage() stage.parent_pipeline = {'uid':'pipe.0000', 'name': 'pipe.0000'} packets.append(('Stage', stage)) hostname = os.environ.get('RMQ_HOSTNAME', 'localhost') port = int(os.environ.get('RMQ_PORT', '5672')) username = os.environ.get('RMQ_USERNAME','guest') password = os.environ.get('RMQ_PASSWORD','guest') credentials = pika.PlainCredentials(username, password) rmq_conn_params = pika.ConnectionParameters(host=hostname, port=port, credentials=credentials) mq_connection = pika.BlockingConnection(rmq_conn_params) mq_channel = mq_connection.channel() mq_channel.queue_declare(queue='master') master_thread = mt.Thread(target=component_execution, name='tmgr_sync', args=(packets, rmq_conn_params, 'master')) master_thread.start() time.sleep(1) try: while packets: packet = packets.pop(0) _, _, body = mq_channel.basic_get(queue='master') msg = json.loads(body) self.assertEqual(msg['object'], packet[1].to_dict()) self.assertEqual(msg['type'], packet[0]) except Exception as ex: print(ex) print(json.loads(body)) master_thread.join() mq_channel.queue_delete(queue='master') mq_channel.close() mq_connection.close() raise ex else: master_thread.join() mq_channel.queue_delete(queue='master') mq_channel.close() mq_connection.close()
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/date'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def test_wfp_workflow_incomplete(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() assert wfp.workflow_incomplete() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') p.stages[0].state == states.SCHEDULING p.state == states.SCHEDULED for t in p.stages[0].tasks: t.state = states.COMPLETED import json import pika task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s-completedq-1' % amgr._sid, body=task_as_dict) amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp, )) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() assert not wfp.workflow_incomplete()
def test_wfp_dequeue(): p = Pipeline() s = Stage() t = Task() t.executable = '/bin/date' s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._hostname, port=amgr._port, resubmit_failed=False) wfp.initialize_workflow() assert p.state == states.INITIAL assert p.stages[0].state == states.INITIAL for t in p.stages[0].tasks: assert t.state == states.INITIAL p.state == states.SCHEDULED p.stages[0].state == states.SCHEDULING for t in p.stages[0].tasks: t.state = states.COMPLETED task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=amgr._hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s' % amgr._completed_queue[0], body=task_as_dict) wfp.start_processor() th = mt.Thread(target=func_for_dequeue_test, name='temp-proc', args=(p, )) th.start() th.join() wfp.terminate_processor() assert p.state == states.DONE assert p.stages[0].state == states.DONE for t in p.stages[0].tasks: assert t.state == states.DONE
def constructTask(url): response = requests.get(url) response = response.json() t = Task() t.name = str(response['name']) t.executable = [str(response['executable'])] t.arguments = [str(response['arguments'])] return t
def func_for_mock_tmgr_test(mq_hostname, port, pending_queue, completed_queue): mq_connection = pika.BlockingConnection(pika.ConnectionParameters( host=mq_hostname, port=port)) mq_channel = mq_connection.channel() tasks = list() for _ in range(16): task = Task() task.state = states.SCHEDULING task.executable = '/bin/echo' tasks.append(task.to_dict()) tasks_as_json = json.dumps(tasks) mq_channel.basic_publish(exchange='', routing_key=pending_queue, body=tasks_as_json) cnt = 0 while cnt < 15: method_frame, props, body = mq_channel.basic_get(queue=completed_queue) if not body: continue task = Task() task.from_dict(json.loads(body)) if task.state == states.DONE: cnt += 1 mq_channel.basic_ack(delivery_tag=method_frame.delivery_tag) mq_connection.close()
def generate_pipeline(): def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE <= MAX_NEW_STAGE: func_on_true() func_on_false() def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 s = Stage() for i in range(10): t = Task() t.executable = '/bin/sleep' t.arguments = ['30'] s.add_tasks(t) # Add post-exec to the Stage s.post_exec = func_condition p.add_stages(s) def func_on_false(): print('Done') # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() for i in range(10): t1 = Task() t1.executable = 'sleep' t1.arguments = ['30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = func_condition # Add Stage to the Pipeline p.add_stages(s1) return p
def test_wfp_workflow_incomplete(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() assert wfp.workflow_incomplete() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') p.stages[0].state == states.SCHEDULING p.state == states.SCHEDULED for t in p.stages[0].tasks: t.state = states.COMPLETED import json import pika task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s-completedq-1' % amgr._sid, body=task_as_dict) amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp,)) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() assert not wfp.workflow_incomplete()
def test_amgr_synchronizer(): logger = ru.get_logger('radical.entk.temp_logger') profiler = ru.Profiler(name='radical.entk.temp') amgr = Amgr(hostname=hostname, port=port) mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=hostname, port=port)) mq_channel = mq_connection.channel() amgr._setup_mqs() p = Pipeline() s = Stage() # Create and add 100 tasks to the stage for cnt in range(100): t = Task() t.executable = ['some-executable-%s' % cnt] s.add_tasks(t) p.add_stages(s) p._assign_uid(amgr._sid) p._validate() amgr.workflow = [p] for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL # Start the synchronizer method in a thread amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() # Start the synchronizer method in a thread proc = Process(target=func_for_synchronizer_test, name='temp-proc', args=(amgr._sid, p, logger, profiler)) proc.start() proc.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULING assert p.stages[0].state == states.SCHEDULING assert p.state == states.SCHEDULING amgr._terminate_sync.set() sync_thread.join()
def generate(self, fldr_name): tasks = [] for i in range(self.n_samples): t = Task() t.executable = self.model t.arguments = [fldr_name, i, self.select_file] tasks.append(t) return tasks
def test_stage_check_complete(): s = Stage() t1 = Task() t1.executable = ['/bin/date'] t2 = Task() t2.executable = ['/bin/date'] s.add_tasks([t1, t2]) assert s._check_stage_complete() == False s._set_tasks_state(states.DONE) assert s._check_stage_complete() == True
def test_pipeline_stage_assignment(): p = Pipeline() s = Stage() t = Task() t.executable = '/bin/date' s.tasks = t p.stages = s assert type(p.stages) == list assert p._stage_count == 1 assert p._cur_stage == 1 assert p.stages[0] == s
def test_pipeline_stage_assignment(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.tasks = t p.stages = s assert type(p.stages) == list assert p._stage_count == 1 assert p._cur_stage == 1 assert p.stages[0] == s
def test_stage_task_assignment(): """ ***Purpose***: Test if necessary attributes are automatically updates upon task assignment """ s = Stage() t = Task() t.executable = '/bin/date' s.tasks = t assert type(s.tasks) == set assert s._task_count == 1 assert t in s.tasks
def test_stage_task_assignment(): """ ***Purpose***: Test if necessary attributes are automatically updates upon task assignment """ s = Stage() t = Task() t.executable = ['/bin/date'] s.tasks = t assert type(s.tasks) == set assert s._task_count == 1 assert t in s.tasks
def test_amgr_synchronizer(): logger = ru.Logger('radical.entk.temp_logger') profiler = ru.Profiler(name='radical.entk.temp') amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() p = Pipeline() s = Stage() # Create and add 100 tasks to the stage for cnt in range(100): t = Task() t.executable = ['some-executable-%s' % cnt] s.add_tasks(t) p.add_stages(s) p._assign_uid(amgr._sid) p._validate() amgr.workflow = [p] for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL # Start the synchronizer method in a thread amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() # Start the synchronizer method in a thread proc = Process(target=func_for_synchronizer_test, name='temp-proc', args=(amgr._sid, p, logger, profiler)) proc.start() proc.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULING assert p.stages[0].state == states.SCHEDULING assert p.state == states.SCHEDULING amgr._terminate_sync.set() sync_thread.join()
def test_task_validate(): t = Task() t._state = 'test' with pytest.raises(ValueError): t._validate() t = Task() with pytest.raises(MissingError): t._validate()
def esmacs(self, rct_stage, stage, outdir="equilibration", name=None): for i in range(1, 13): t = Task() t.pre_exec = [ "export WDIR=\"{}/{}\"".format(self.run_dir, name), ". {}".format(self.conda_init), "conda activate {}".format(self.esmacs_tenv), "module load {}".format(self.esmacs_tmodules), "mkdir -p $WDIR/replicas/rep{}/{}".format(i, outdir), "cd $WDIR/replicas/rep{}/{}".format(i, outdir), "rm -f {}.log {}.xml {}.dcd {}.chk".format( stage, stage, stage, stage), "export OMP_NUM_THREADS=1" ] # t.executable = '/ccs/home/litan/miniconda3/envs/wf3/bin/python3.7' t.executable = 'python3' t.arguments = ['$WDIR/{}.py'.format(stage)] t.post_exec = [] t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } getattr(self, rct_stage).add_tasks(t) print(getattr(self, rct_stage).to_dict())
def describe_MD_stages(): # Docking stage s1 = Stage() s1.name = 'Docking.%d' % CUR_NEW_STAGE # Docking task t1 = Task() t1.executable = ['sleep'] t1.arguments = ['3'] # Add the Docking task to the Docking Stage s1.add_tasks(t1) # MD stage s2 = Stage() s2.name = 'Simulation.%d' % CUR_NEW_STAGE # Each Task() is an OpenMM executable that will run on a single GPU. # Set sleep time for local testing for i in range(6): t2 = Task() t2.executable = ['sleep'] t2.arguments = ['5'] # Add the MD task to the Docking Stage s2.add_tasks(t2) # Add post-exec to the Stage s2.post_exec = func_condition return [s1, s2]
def test_task_assign_uid(): t = Task() try: home = os.environ.get('HOME', '/home') folder = glob.glob('%s/.radical/utils/test*' % home) for f in folder: shutil.rmtree(f) except: pass t._assign_uid('test') assert t.uid == 'task.0000'
def test_stage_task_addition(): s = Stage() t1 = Task() t1.executable = ['/bin/date'] t2 = Task() t2.executable = ['/bin/date'] s.add_tasks(set([t1, t2])) assert type(s.tasks) == set assert s._task_count == 2 assert t1 in s.tasks assert t2 in s.tasks s = Stage() t1 = Task() t1.executable = ['/bin/date'] t2 = Task() t2.executable = ['/bin/date'] s.add_tasks([t1, t2]) assert type(s.tasks) == set assert s._task_count == 2 assert t1 in s.tasks assert t2 in s.tasks
def generate_task(self, **ensembles): """ Generate a `radical.entk` task. Parameters ---------- ensembles: dict, OrderedDict Dictionary of the *current* values of variables that are ensembles. All the variables that were declared with `add_ensemble` should be specified here so that a correct task object can be generated. """ [setattr(self, k, w) for k, w in ensembles.iteritems()] if not self.all_variables_defined(): raise ValueError('Some variables are not defined!') task = Task() task.name = ensembles['task_name'] task.pre_exec += self.engine.pre_exec task.executable += str(self.engine.executable) task.arguments += self.engine.arguments task.cpu_reqs = { 'processes': self._processes, 'process_type': 'MPI' if self.engine.uses_mpi else None, 'threads_per_process': self._threads_per_process, 'thread_type': None } task.gpu_reqs = { 'processes': self._gpu_processes, 'process_type': 'MPI' if self.engine.gpu_uses_mpi else None, 'threads_per_process': self._gpu_threads_per_process, 'thread_type': None } task.arguments.extend(self.arguments) task.copy_input_data.extend(self.copied_files) task.copy_input_data.extend(self.system.copied_files) task.post_exec.append('echo "{}" > sim_desc.txt'.format(task.name)) task.link_input_data.extend(self.input_data(**ensembles)) task.link_input_data.extend(self.system.linked_files) task.pre_exec.extend( self._sed.format(n, v, f) for f, vs in self.get_variables().items() for n, v in vs) return task
def test_wfp_enqueue(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_enqueue_test, name='temp-proc', args=(wfp, )) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULED assert p.stages[0].state == states.SCHEDULED assert p.state == states.SCHEDULING
def generate_pipeline(): def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE <= MAX_NEW_STAGE: return True return False def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 for t in p.stages[CUR_NEW_STAGE].tasks: dur = randint(10, 30) t.arguments = [str(dur)] def func_on_false(): print 'Done' # Create a Pipeline object p = Pipeline() for s in range(MAX_NEW_STAGE + 1): # Create a Stage object s1 = Stage() for _ in range(CUR_TASKS): t1 = Task() t1.executable = ['sleep'] t1.arguments = ['30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE <= MAX_NEW_STAGE: return True return False def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 shuffle(p.stages[CUR_NEW_STAGE:]) def func_on_false(): print 'Done' # Create a Pipeline object p = Pipeline() for s in range(MAX_NEW_STAGE+1): # Create a Stage object s1 = Stage() for i in range(CUR_TASKS): t1 = Task() t1.executable = '/bin/sleep' t1.arguments = [ '30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { condition': func_condition, on_true': func_on_true, on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
def create_inversion_dict_stage(cmt_file_db, param_path, task_counter): """Creates stage for the creation of the inversion files. This stage is tiny, but required before the actual inversion. :param cmt_file_db: :param param_path: :param task_counter: :return: """ # Get database parameter path databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") # Load Parameters DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db) # Function inv_dict_func = os.path.join(bin_path, "write_inversion_dicts.py") # Create Process Paths Stage (CPP) # Create a Stage object inv_dict_stage = Stage() inv_dict_stage.name = "Creating" # Create Task inv_dict_task = Task() # This way the task gets the name of the path file inv_dict_task.name = "Inversion-Dictionaries" inv_dict_task.pre_exec = [ # Conda activate DB_params["conda-activate"] ] inv_dict_task.executable = [DB_params["bin-python"]] # Assign exec # to the task inv_dict_task.arguments = [ inv_dict_func, "-f", cmt_file_db, "-p", param_path ] # In the future maybe to database dir as a total log? inv_dict_task.stdout = os.path.join( "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), inv_dict_task.name)) inv_dict_task.stderr = os.path.join( "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" % (Cid, str(task_counter).zfill(4), inv_dict_task.name)) inv_dict_stage.add_tasks(inv_dict_task) task_counter += 1 return inv_dict_stage, task_counter
def test_stage_set_tasks_state(): s = Stage() t1 = Task() t1.executable = ['/bin/date'] t2 = Task() t2.executable = ['/bin/date'] s.add_tasks([t1, t2]) with pytest.raises(ValueError): s._set_tasks_state(2) s._set_tasks_state(states.DONE) assert t1.state == states.DONE assert t2.state == states.DONE
def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['sleep'] t1.arguments = ['10'] s.add_tasks(t1) p.add_stages(s) return p
def test_wfp_enqueue(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_enqueue_test, name='temp-proc', args=(wfp,)) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULED assert p.stages[0].state == states.SCHEDULED assert p.state == states.SCHEDULING
def func_for_mock_tmgr_test(mq_hostname, port, pending_queue, completed_queue): mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=mq_hostname, port=port)) mq_channel = mq_connection.channel() tasks = list() for _ in range(16): t = Task() t.state = states.SCHEDULING t.executable = '/bin/echo' tasks.append(t.to_dict()) tasks_as_json = json.dumps(tasks) mq_channel.basic_publish(exchange='', routing_key=pending_queue, body=tasks_as_json) cnt = 0 while cnt < 15: method_frame, props, body = mq_channel.basic_get(queue=completed_queue) if body: task = Task() task.from_dict(json.loads(body)) if task.state == states.DONE: cnt += 1 mq_channel.basic_ack(delivery_tag=method_frame.delivery_tag) mq_connection.close()
def test_pipeline_stage_addition(): p = Pipeline() s1 = Stage() t = Task() t.executable = ['/bin/date'] s1.tasks = t s2 = Stage() t = Task() t.executable = ['/bin/date'] s2.tasks = t p.add_stages([s1, s2]) assert type(p.stages) == list assert p._stage_count == 2 assert p._cur_stage == 1 assert p.stages[0] == s1 assert p.stages[1] == s2
def generate_pipeline(): def func_condition(): p.suspend() print 'Suspending pipeline %s for 10 seconds' %p.uid sleep(10) return True def func_on_true(): print 'Resuming pipeline %s' %p.uid p.resume() def func_on_false(): pass # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() for i in range(10): t1 = Task() t1.executable = '/bin/sleep' t1.arguments = ['30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = ['/bin/sleep'] t1.arguments = ['300'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = ['mv'] t1.arguments = ['temp','/tmp/'] t1.upload_input_data = ['%s/temp'%cur_dir] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def test_wfp_initialize_workflow(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) wfp = WFprocessor(sid='test', workflow=[p], pending_queue=list(), completed_queue=list(), mq_hostname=hostname, port=port, resubmit_failed=False) wfp._initialize_workflow() assert p.uid is not None assert p.stages[0].uid is not None for t in p.stages[0].tasks: assert t.uid is not None
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.name = 't1' t1.executable = ['/bin/false'] # t1.arguments = ['"Hello World"','>>','temp.txt'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' # Create 4K tasks to ensure we don't hit any RMQ connection drops for _ in range(4096): t1 = Task() t1.executable = ['/bin/echo'] t1.arguments = ['"Hello World"'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB for x in range(10): t1 = Task() t1.executable = 'cat' t1.arguments = ['file1.txt','file2.txt','>','output.txt'] t1.copy_input_data = ['$SHARED/file1.txt', '$SHARED/file2.txt'] t1.download_output_data = ['output.txt > %s/output_%s.txt' %(cur_dir,x+1)] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 s = Stage() for i in range(10): t = Task() t.executable = '/bin/sleep' t.arguments = [ '30'] s.add_tasks(t) # Add post-exec to the Stage s.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } p.add_stages(s)
def test_amgr_run_mock(): p = Pipeline() s = Stage() t = Task() t.name = 'simulation' t.executable = ['/bin/date'] s.tasks = t p.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, 'project': '' } appman = Amgr(hostname=hostname, port=port, rts="mock") appman.resource_desc = res_dict appman.workflow = [p] appman.run()
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] s.add_tasks(t1) p.add_stages(s) return p