def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = '/bin/bash' t1.arguments = ['-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object to hold character count tasks s2 = Stage() # Create a Task object t2 = Task() t2.executable = '/bin/bash' t2.arguments = ['-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt'] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid)] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create another Stage object to hold checksum tasks s3 = Stage() # Create a Task object t3 = Task() t3.executable = '/bin/bash' t3.arguments = ['-l', '-c', 'sha1sum ccount.txt > chksum.txt'] # Copy data from the task in the first stage to the current task's location t3.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/ccount.txt' % (p.uid, s2.uid, t2.uid)] # Download the output of the current task to the current location t3.download_output_data = ['chksum.txt > chksum_%s.txt' % cnt] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) return p
def generate_pipeline(nid): p = Pipeline() s1 = Stage() s2 = Stage() t1 = Task() p.name = 'p%s' % nid s1.name = 's1' s2.name = 's2' t1.name = 't1' t1.executable = '/bin/echo' t1.arguments = ['hello'] s1.add_tasks(t1) p.add_stages(s1) for cnt in range(10): tn = Task() tn.name = 't%s' % (cnt + 1) tn.executable = '/bin/echo' tn.arguments = ['world'] # Copy data from the task in first stage to the current task's location tn.copy_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/output.txt' % (p.name, s1.name, t1.name)] s2.add_tasks(tn) p.add_stages(s2) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB for x in range(10): t1 = Task() t1.executable = 'cat' t1.arguments = ['file1.txt', 'file2.txt', '>', 'output.txt'] t1.copy_input_data = ['$SHARED/file1.txt', '$SHARED/file2.txt'] t1.download_output_data = [ 'output.txt > %s/output_%s.txt' % (cur_dir, x + 1) ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = '/bin/date' t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/date'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_single_task(): t1 = Task() t1.name = 'dummy_task' t1.executable = ['placeholder'] t1.arguments = ['a','b','c'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def foo(value): t1 = Task(name='simulation') t1.environment = ['module load gromacs'] t1.executable = ['gmx mdrun'] t1.arguments = ['a', 'b', 'c'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['gmx mdrun'] t1.arguments = ['a', 'b', 'c'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def get_pipeline(tasks): # Create a Pipeline object p = Pipeline() # Create a Stage 1 s1 = Stage() # Create a Task object according to the app_name t1 = Task() t1.pre_exec = ['module load gromacs/5.0/INTEL-140-MVAPICH2-2.0'] t1.executable = app_coll['grompp']['executable'] t1.arguments = app_coll['grompp']['arguments'] t1.cores = app_coll['grompp']['cores'] t1.link_input_data = [ '$SHARED/grompp.mdp > grompp.mdp', '$SHARED/input.gro > input.gro', '$SHARED/topol.top > topol.top' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create a Stage 2 s2 = Stage() for cnt in range(tasks): # Create a Task object according to the app_name t2 = Task() t2.pre_exec = [ 'module load gromacs/5.0/INTEL-140-MVAPICH2-2.0', 'export OMP_NUM_THREADS=%s' % num_cores ] t2.executable = app_coll['mdrun']['executable'] t2.arguments = app_coll['mdrun']['arguments'] #t2.cores = app_coll['mdrun']['cores'] t2.cores = num_cores t2.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/topol.tpr' % (p.uid, s1.uid, t1.uid) ] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) return p
def generate_pipeline(nid): # Create a Pipeline object p = Pipeline() p.name = 'p%s' % nid # Create a Stage object s1 = Stage() s1.name = 's1' # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.name = 't2' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object to hold character count tasks s2 = Stage() s2.name = 's2' s2_task_uids = [] for cnt in range(10): # Create a Task object t2 = Task() t2.name = 't%s' % (cnt + 1) t2.executable = ['/bin/echo'] t2.arguments = ['world'] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/output.txt' % (p.name, s1.name, t1.name) ] # Add the Task to the Stage s2.add_tasks(t2) s2_task_uids.append(t2.name) # Add Stage to the Pipeline p.add_stages(s2) return p
def create_pipeline(): p = Pipeline() s = Stage() t = Task() t.name = 'simulation' t.executable = ['/bin/echo'] t.arguments = ['hello'] t.copy_input_data = [] t.copy_output_data = [] s.add_tasks(t) p.add_stages(s) return p
def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] s.add_tasks(t1) p.add_stages(s) return p
def test_assignment_exceptions(): t = Task() data_type = [1, 'a', True, list()] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB for x in range(10): t1 = Task() t1.executable = 'cat' t1.arguments = ['file1.txt','file2.txt','>','output.txt'] t1.copy_input_data = ['$SHARED/file1.txt', '$SHARED/file2.txt'] t1.download_output_data = ['output.txt > %s/output_%s.txt' %(cur_dir,x+1)] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def func_on_true(): global cur_iter, book # Create Stage 2 s2 = Stage() s2.name = 'iter%s-s2' % cur_iter[instance] # Create a Task t2 = Task() t2.name = 'iter%s-s2-t2' % cur_iter[instance] t2.pre_exec = ['source %s/bin/GMXRC.bash' % GMX_PATH] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if cur_iter[instance] == 1: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.name, book[p.name]['stages'][-2]['name'], book[p.name]['stages'][-2]['task']) ] # Add the Task to the Stage s2.add_tasks(t2) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s2.name, 'task': t2.name}) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() s3.name = 'iter%s-s3' % cur_iter[instance] # Create a Task t3 = Task() t3.name = 'iter%s-s3-t3' % cur_iter[instance] t3.pre_exec = ['source %s/bin/GMXRC.bash' % GMX_PATH] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( cur_iter[instance], instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format( cur_iter[instance], instance) ] t3.download_output_data = [ 'CB7G3.xtc > ./output/CB7G3_run{1}_gen{0}.xtc'.format( cur_iter[instance], instance), 'CB7G3.log > ./output/CB7G3_run{1}_gen{0}.log'.format( cur_iter[instance], instance), 'CB7G3_dhdl.xvg > ./output/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullf.xvg > ./output/CB7G3_run{1}_gen{0}_pullf.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullx.xvg > ./output/CB7G3_run{1}_gen{0}_pullx.xvg'.format( cur_iter[instance], instance), 'CB7G3.gro > ./output/CB7G3_run{1}_gen{0}.gro'.format( cur_iter[instance], instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s3.name, 'task': t3.name}) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() s4.name = 'iter%s-s4' % cur_iter[instance] # Create a Task t4 = Task() t4.name = 'iter%s-s4-t4' % cur_iter[instance] t4.pre_exec = [ 'module load python/2.7.7-anaconda', 'export PYTHONPATH=%s/alchemical_analysis:$PYTHONPATH' % ALCH_ANA_PATH, 'export PYTHONPATH=%s:$PYTHONPATH' % ALCH_ANA_PATH, 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ 'analysis_2.py', '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', # '--prev_data=%s'%DATA_LOC '--gen={0}'.format(cur_iter[instance], instance), '--run={1}'.format(cur_iter[instance], instance) ] t4.cores = 1 t4.copy_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > ./output/results_run{1}_gen{0}.txt'. format(cur_iter[instance], instance), 'STDOUT > ./output/stdout_run{1}_gen{0}'.format( cur_iter[instance], instance), 'STDERR > ./output/stderr_run{1}_gen{0}'.format( cur_iter[instance], instance), 'CB7G3_run.mdp > ./output/CB7G3_run{1}_gen{0}.mdp'.format( cur_iter[instance], instance), 'results_average.txt > ./output/results_average_run{1}_gen{0}.txt'. format(cur_iter[instance], instance) ] s4.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add the Task to the Stage s4.add_tasks(t4) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s4.name, 'task': t4.name}) # Add Stage to the Pipeline p.add_stages(s4) print book
t.pre_exec = ['module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top'] t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t.cores = 32 stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) else: for n0 in range(Replicas): t = Task() t.executable = ['/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d'] #MD Engine t.copy_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/out.gro > in.gro'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/in.top'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/FNF.itp'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/martini_v2.2.itp'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/in.mdp'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0])] t.pre_exec = ['module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top'] t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t.cores = 32 stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) print p.stages print p.stages[1].tasks # Create a dictionary describe four mandatory keys: # resource, walltime, cores and project # resource is 'local.localhost' to execute locally
else: for n0 in range(Replicas): t = Task() t.executable = [ '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d' ] #MD Engine t.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/out.gro > in.gro' % (p.uid, stage_uids[N_Stg - 1], task_uids['Stage_%s' % (N_Stg - 1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/in.top' % (p.uid, stage_uids[N_Stg - 1], task_uids['Stage_%s' % (N_Stg - 1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/FNF.itp' % (p.uid, stage_uids[N_Stg - 1], task_uids['Stage_%s' % (N_Stg - 1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/martini_v2.2.itp' % (p.uid, stage_uids[N_Stg - 1], task_uids['Stage_%s' % (N_Stg - 1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/in.mdp' % (p.uid, stage_uids[N_Stg - 1], task_uids['Stage_%s' % (N_Stg - 1)][n0]) ] t.pre_exec = [ 'module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top' ] t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t.cores = 32 stg.add_tasks(t)
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) num_replicas = int(Kconfig.num_replicas) #if cur_iter==0: # restart_iter=0 #else: # restart_iter=cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['mv'] pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file) ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() def_rep_per_thread = int(num_replicas / num_parallel) + 1 num_allocated_rep = 0 num_used_threads = 0 while (num_allocated_rep < num_replicas): if (num_used_threads == num_parallel): print("ALLERT tried use more gpus than allocated") if ((num_replicas - num_allocated_rep) > def_rep_per_thread): use_replicas = def_rep_per_thread else: use_replicas = (num_replicas - num_allocated_rep) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--trajstride', '10', '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log' ] link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr if str(Kconfig.strategy) == 'extend': copy_out = [] for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter + 1), idx) ] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=tica_msm_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['python'] ana_task.arguments = [ 'run-tica-msm.py', '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/run-tica-msm.py > run-tica-msm.py', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': [], 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': { 'uid': None, 'name': None }, 'parent_pipeline': { 'uid': None, 'name': None } } t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': ['sleep'], 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': { 'uid': 's1', 'name': 'stage1' }, 'parent_pipeline': { 'uid': 'p1', 'name': 'pipeline1' } }
for i in range(num_workers): w = Thread(target=worker, name='worker', args=(worker_queue[i%num_queues],i)) w.start() workers.append(w) t = Task() t.arguments = ["--template=PLCpep7_template.mdp", "--newname=PLCpep7_run.mdp", "--wldelta=100", "--equilibrated=False", "--lambda_state=0", "--seed=1"] t.cores = 20 t.copy_input_data = ['$STAGE_2_TASK_1/PLCpep7.tpr'] t.download_output_data = ['PLCpep7.xtc > PLCpep7_run1_gen0.xtc', 'PLCpep7.log > PLCpep7_run1_gen0.log', 'PLCpep7_dhdl.xvg > PLCpep7_run1_gen0_dhdl.xvg', 'PLCpep7_pullf.xvg > PLCpep7_run1_gen0_pullf.xvg', 'PLCpep7_pullx.xvg > PLCpep7_run1_gen0_pullx.xvg', 'PLCpep7.gro > PLCpep7_run1_gen0.gro' ] t = json.dumps(t.to_dict()) msg_num = 0 start = time.time() while msg_num < num_tasks: #message = 'message_%s'%msg_num
# Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object s2 = Stage() s2.name = 'Stage 2' # Create a Task object t2 = Task() t2.executable = '/bin/bash' t2.arguments = [ '-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt' ] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid) ] # Download the output of the current task to the current location t2.download_output_data = ['ccount.txt'] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Assign the workflow as a set or list of Pipelines to the Application Manager appman.workflow = set([p])
# Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object s2 = Stage() s2.name = 'Stage 2' # Create a Task object t2 = Task() t2.executable = ['/bin/bash'] t2.arguments = ['-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt'] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid)] # Download the output of the current task to the current location t2.download_output_data = ['ccount.txt'] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Assign the workflow as a set or list of Pipelines to the Application Manager appman.workflow = set([p])
def test_task_exceptions(s,l,i,b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s,l,i,b] for data in data_type: if not isinstance(data,str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data,list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, list): with pytest.raises(TypeError): t.executable = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
ExchangePairs = [] with open('exchangePairs.txt', "rb") as file ### read file into list, ### use list ot populate data staging placeholders for i in file.readlines(): tmp = i.split(" ") try: ExchangePairs.append((int(tmp[0]), int(tmp[1]))) except:pass ###AND THEN, define the task for n0 in range(Replicas): t = Task() t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine t.copy_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/restrt > inpcrd'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/prmtop'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/mdin'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0])] t.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] t.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out'] t.cores = Replica_Cores stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) # Create a dictionary describe four mandatory keys: # resource, walltime, cores and project # resource is 'local.localhost' to execute locally res_dict = {
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': str(), 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': {'uid':None, 'name': None}, 'parent_pipeline': {'uid':None, 'name': None}} t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}} t.executable = 'sleep' d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}
t1.pre_exec = [ # Modules to be loaded 'module purge', 'module load intel/18.0', 'module load intel-mpi/intel/2018.3', # Untar the input data 'tar -zxf specfem_data.tar.gz', ] t1.executable = ['./bin/xspecfem3D'] t1.cpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.copy_input_data = ['/projects/TROMP/entk/scratch/specfem_data.tar.gz'] t1.post_exec = [ # Tar output files 'tar -zcf specfem_final.tar.gz bin DATA DATABASES_MPI OUTPUT_FILES', # Copy to scratch folder 'cp specfem_final.tar.gz /projects/TROMP/entk/scratch/', ] t1.download_output_data = ['STDOUT', 'STDERR', 'specfem_final.tar.gz'] specfem_stage.add_tasks(t1) p.add_stages(specfem_stage) res_dict = { 'resource': 'princeton.tiger_cpu', 'project': 'geo',
def test_input_list_from_task(): """ **Purpose**: Test if the 'get_input_list_from_task' function generates the correct RP input transfer directives when given a Task """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } for t in [1, 'a', list(), dict(), True]: with pytest.raises(TypeError): t = list() get_input_list_from_task(t, placeholder_dict) # Test link input data t = Task() t.link_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.link_input_data[0] assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0]) t = Task() t.link_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.link_input_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0].split('>')[1].strip()) # Test copy input data t = Task() t.copy_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_input_data[0] assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0]) t = Task() t.copy_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_input_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0].split('>')[1].strip()) # Test move input data t = Task() t.move_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_input_data[0] assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0]) t = Task() t.move_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_input_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0].split('>')[1].strip()) # Test upload input data t = Task() t.upload_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.upload_input_data[0] assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0]) t = Task() t.upload_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.upload_input_data[0].split('>')[0].strip() assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0].split('>')[1].strip())
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description """ pipeline = 'p1' stage = 's1' task = 't1' placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = ['grompp'] t1.arguments = ['hello'] t1.cpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s p._assign_uid('test') cud = create_cud_from_task(t1, placeholder_dict) assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert cud.post_exec == t1.post_exec assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
# Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object s2 = Stage() s2.name = 'Stage 2' # Create a Task object t2 = Task() t2.executable = '/bin/bash' t2.arguments = [ '-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt' ] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = [ '$Pipline_%s_Stage_%s_Task_%s/output.txt' % (p.name, s1.name, t1.name) ] # Download the output of the current task to the current location t2.download_output_data = ['ccount.txt'] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Application Manager appman = AppManager(hostname=hostname, port=port, username=username, password=password)
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory) num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) config_file = str(args.Kconfig).rsplit('/', 1)[-1] try: systemxml = str(Kconfig.systemxml) except: systemxml = 'system-5.xml' try: integratorxml = str(Kconfig.integratorxml) except: integratorxml = 'integrator-5.xml' md_settings = Kconfig.md_env if Kconfig.env_ana_same == 'True': ana_settings = md_settings else: ana_settings = Kconfig.ana_env print("set", num_parallel, md_settings) iter_found = 0 while len(glob.glob('%s/iter%s_input*.pdb' % (combined_path, iter_found))) >= num_replicas: iter_found += 1 cur_iter = max(0, iter_found - 1) print("cur_iter", cur_iter) #if cur_iter==0: # pre_proc_stage2 = Stage() # pre_proc_task2 = Task() # pre_proc_task2.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'] # pre_proc_task2.executable = ['ls'] # pre_proc_task2.arguments = ['-l'] # pre_proc_task2.copy_input_data = ['$SHARED/%s > %s/%s' % (config_file,combined_path, config_file), # '$SHARED/%s > %s/%s' % (script_ana,combined_path,script_ana), # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file), # '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference)]# '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] # pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) # pre_proc_stage2.add_tasks(pre_proc_task2) # wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ start_iter = cur_iter while (cur_iter < int(Kconfig.num_iterations) and cur_iter < start_iter + 1): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() num_allocated_rep = 0 num_used_parallel = 0 while (num_allocated_rep < num_replicas): def_rep_per_thread = int( math.ceil( float(num_replicas - num_allocated_rep) / float(num_parallel - num_used_parallel))) use_replicas = min(def_rep_per_thread, num_replicas - num_allocated_rep) print("u", cur_iter, use_replicas, num_replicas, num_parallel, def_rep_per_thread, num_allocated_rep, num_used_parallel) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } sim_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 10, 'thread_type': 'OpenMP' } sim_task.arguments = [ 'run_openmm.py', '--Kconfig', config_file, '--idxstart', str(num_allocated_rep), '--idxend', str(num_allocated_rep + use_replicas), '--path', combined_path, '>', 'md.log' ] #'--trajstride', str(Kconfig.trajstride),'--Kconfig', str(args.Kconfig), #'--idxstart',str(num_allocated_rep), '--idxend',str((num_allocated_rep+use_replicas)), #'--path',combined_path,'--iter',str(cur_iter), #'--md_steps',str(Kconfig.md_steps), '--save_traj', 'True','>', 'md.log'] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (systemxml, systemxml), '$SHARED/%s > %s' % (integratorxml, integratorxml), '$SHARED/%s > %s' % (config_file, config_file) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (config_file, config_file) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] #if str(Kconfig.strategy)=='extend': # for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)] #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): ## #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas num_used_parallel = num_used_parallel + 1 sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) if str(Kconfig.strategy) != 'extend': for anatask in range(1): print("analysis task", anatask) ana_task = Task() ana_task.executable = ['python'] pre_exec_arr = ana_settings ana_task.pre_exec = pre_exec_arr ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (config_file, config_file) ] ana_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } ana_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 10, 'thread_type': 'OpenMP' } ana_task.arguments = [ script_ana, '--Kconfig', config_file, '>', "analysis.log" ] ana_task.copy_output_data = [ 'analysis.log > %s/analysis_iter%s_r%s.log' % (combined_path, cur_iter, anatask) ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, sim_stage.uid, ana_task.uid) sim_stage.add_tasks(ana_task) wf.add_stages(sim_stage) cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
t.executable = ['./bin/xspecfem3D'] t.cpu_reqs = { 'processes': 0, 'process_type': 'MPI', 'threads_per_process': 0, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 384, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t.copy_input_data = [ '/lustre/atlas/scratch/vivekb/bip149/ssflow-N-seq-events/specfem_data_event_%s.tar' % event, '/lustre/atlas/scratch/vivekb/bip149/ssflow-N-seq-events/specfem_validator.py' ] t.post_exec = [ 'python specfem_validator.py OUTPUT_FILES/output_solver.txt' ] specfem_stage.add_tasks(t2) p.add_stages(specfem_stage) res_dict = { 'resource': 'ornl.titan_aprun', 'walltime': 8 * num_events, 'cpus': 385, 'gpus': 385, 'project': 'BIP149',
def test_task_exceptions(s, l, i, b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s, l, i, b] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def push_function(ind, num_push, num_queues): try: mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost', port=32769)) mq_channel = mq_connection.channel() tasks_pushed = 0 global MAX_TASKS proc_tasks = MAX_TASKS/num_push push_times = [] proc_mem = [] t = Task() t.arguments = ["--template=PLCpep7_template.mdp", "--newname=PLCpep7_run.mdp", "--wldelta=100", "--equilibrated=False", "--lambda_state=0", "--seed=1"] t.cores = 20 t.copy_input_data = ['$STAGE_2_TASK_1/PLCpep7.tpr'] t.download_output_data = ['PLCpep7.xtc > PLCpep7_run1_gen0.xtc', 'PLCpep7.log > PLCpep7_run1_gen0.log', 'PLCpep7_dhdl.xvg > PLCpep7_run1_gen0_dhdl.xvg', 'PLCpep7_pullf.xvg > PLCpep7_run1_gen0_pullf.xvg', 'PLCpep7_pullx.xvg > PLCpep7_run1_gen0_pullx.xvg', 'PLCpep7.gro > PLCpep7_run1_gen0.gro' ] t_dict = t.to_dict() print 'Size of task: ', asizeof.asizeof(t_dict) name = 'queue_%s'%(ind%num_queues) while (tasks_pushed < proc_tasks)and(not kill_pusher.is_set()): corr_id = str(uuid.uuid4()) obj = { 'task': t_dict, 'id': corr_id} mq_channel.basic_publish( exchange='', routing_key=name, properties=pika.BasicProperties(correlation_id = corr_id), body=json.dumps(obj) ) tasks_pushed +=1 cur_time = time.time() push_times.append(cur_time) mem = psutil.virtual_memory().available/(2**20) # MBytes proc_mem.append(mem) # print '%s: Push average throughput: %s tasks/sec'%(name, # float(tasks_pushed/(cur_time - start_time))) print 'Push: ',tasks_pushed f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(len(push_times)): f.write('%s %s\n'%(push_times[i],proc_mem[i])) #f.write('%s\n'%(push_times[ind])) f.close() print 'Push proc killed' except KeyboardInterrupt: print len(push_times) f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(min(len(push_times),len(proc_mem))): f.write('%s %s\n'%(push_times[i], proc_mem[i])) f.close() print 'Push proc killed' except Exception as ex: print 'Unexpected error: %s'%ex print traceback.format_exc() f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(min(len(push_times), len(proc_mem))): f.write('%s %s\n'%(push_times[i], proc_mem[i])) f.close()
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) #run-tica-msm4.py vpy4_settings = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module load bwpy/1.2.4', 'module add bwpy-mpi', 'module add fftw/3.3.4.10', 'module add cray-netcdf', 'module add cudatoolkit/9.1.85_3.10-1.0502.df1cc54.3.1', 'module add cmake/3.1.3', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy4/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] vpy4_2_settings = [ 'module unload bwpy', 'module load bwpy', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LIBRARY_PATH="${BWPY_LIBRARY_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'module load craype-ml-plugin-py3/1.1.0', 'export MPICH_GNI_MALLOC_FALLBACK=enabled', ' export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugin', 'source /projects/sciteam/bamm/hruska/vpy4/bin/activate', 'bwpy-environ', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] vpy8_settings = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload gcc', 'module load gcc/5.3.0', 'module unload bwpy', 'module load bwpy/2.0.0-pre0', 'module load bwpy-mpi', 'module add cudatoolkit', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'module load craype-ml-plugin-py3/1.1.0', 'export MPICH_GNI_MALLOC_FALLBACK=enable', 'export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugins', 'source /projects/sciteam/bamm/hruska/vpy8/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] vpy9_settings = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload gcc', 'module load gcc/5.3.0', 'module unload bwpy', 'module load bwpy/2.0.0-pre1', 'module load bwpy-mpi', 'module add cudatoolkit', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'export MPICH_GNI_MALLOC_FALLBACK=enable', 'export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugins', 'source /projects/sciteam/bamm/hruska/vpy9/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] #'module load craype-ml-plugin-py3/1.1.0' if Kconfig.md_env == 'vpy4': md_settings = vpy4_settings if Kconfig.md_env == 'vpy8': md_settings = vpy8_settings if Kconfig.ana_env == 'vpy8': ana_settings = vpy8_settings if Kconfig.md_env == 'vpy9': md_settings = vpy9_settings if Kconfig.ana_env == 'vpy9': ana_settings = vpy9_settings #if cur_iter==0: # restart_iter=0 #else: # restart_iter=cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['mv'] pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) pre_proc_stage2 = Stage() pre_proc_task2 = Task() pre_proc_task2.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task2.executable = ['ls'] pre_proc_task2.arguments = ['-l'] pre_proc_task2.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana), '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file), '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference), '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) pre_proc_stage2.add_tasks(pre_proc_task2) wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() def_rep_per_thread = int(np.ceil(num_replicas / num_parallel)) num_allocated_rep = 0 num_used_threads = 0 while (num_allocated_rep < num_replicas): if (num_used_threads == num_parallel): print("ALLERT tried use more gpus than allocated") if ((num_replicas - num_allocated_rep) > def_rep_per_thread): use_replicas = def_rep_per_thread else: use_replicas = (num_replicas - num_allocated_rep) sim_task = Task() sim_task.executable = ['bwpy-environ'] #'python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'python', 'run_openmm.py', '--trajstride', str(Kconfig.trajstride), '--Kconfig', str(args.Kconfig), '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log' ] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/system-5.xml > system-5.xml', '$SHARED/integrator-5.xml > integrator-5.xml', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] if str(Kconfig.strategy) == 'extend': for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter + 1), idx) ] for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ 'md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx) ] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = ana_settings + [ 'export tasks=tica_msm_ana', 'export iter=%s' % cur_iter ] ana_task.executable = ['bwpy-environ'] ana_task.arguments = [ 'python', script_ana, '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '--ref', str(Kconfig.md_reference), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': 'MPI', 'threads_per_process': 16, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
# Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object to hold checksum tasks s2 = Stage() #HARD-CODED EXCHANGE FOLLOWED BY MD # Create a Task object t2 = Task() t2.executable = ['/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d'] #MD Engine # exchange happens here for n0 in range(4): t2.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/out.gro > in.gro'%(p.uid, s1.uid, s1_task_uids[n0]), '$Pipline_%s_Stage_%s_Task_%s/in.top'%(p.uid, s1.uid, s1_task_uids[n0]), '$Pipline_%s_Stage_%s_Task_%s/FNF.itp'%(p.uid, s1.uid, s1_task_uids[n0]), '$Pipline_%s_Stage_%s_Task_%s/martini_v2.2.itp'%(p.uid, s1.uid, s1_task_uids[n0]), '$Pipline_%s_Stage_%s_Task_%s/in.mdp'%(p.uid, s1.uid, s1_task_uids[n0])] print t2.copy_input_data t2.pre_exec = ['module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top'] t2.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t2.cores = 5 s2.add_tasks(t2) s2_task_uids.append(t2.uid) # Add Stage to the Pipeline p.add_stages(s2) # Create a dictionary describe four mandatory keys: # resource, walltime, cores and project # resource is 'local.localhost' to execute locally res_dict = {
def create_workflow(Kconfig): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-grlsd' if cur_iter == 0: restart_iter = 0 else: restart_iter = cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro', '--clone', str(Kconfig.num_replicas) ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/iter_%s/input.gro' % (os.path.basename(Kconfig.md_input_file), combined_path, cur_iter), '$SHARED/%s > input.gro' % os.path.basename(Kconfig.md_input_file), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ else: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro' ] pre_proc_task.copy_input_data = [ '%s/iter_%s/out.gro > input.gro' % (combined_path, cur_iter - 1), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): sim_task = Task() if Kconfig.use_gpus == 'False': sim_task.executable = [ '/sw/bw/bwpy/0.3.0/python-single/usr/bin/python' ] sim_task.pre_exec = [ 'module load bwpy', 'export PYTHONPATH="/u/sciteam/hruska/local/lib/python2.7/site-packages:/u/sciteam/hruska/local:/u/sciteam/hruska/local/lib/python:$PYTHONPATH"', 'export PATH=/u/sciteam/hruska/local/bin:$PATH', 'export iter=%s' % cur_iter ] sim_task.cores = int( Kconfig.num_CUs_per_MD_replica ) #on bluewaters tasks on one node are executed concurently else: sim_task.executable = ['python'] sim_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--gro', 'start.gro', '--out', 'out.gro', '--md_steps', str(Kconfig.md_steps), '--save_traj', 'False', '>', 'md.log' ] sim_task.link_input_data = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) if restart_iter == cur_iter: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (pre_proc_task_ref, sim_num)) else: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (post_ana_task_ref, sim_num)) sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated pre_ana_stage = Stage() pre_ana_task = Task() pre_ana_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=pre_ana', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_ana_task.executable = ['python'] pre_ana_task.arguments = ['pre_analyze_openmm.py'] pre_ana_task.link_input_data = [ '$SHARED/pre_analyze_openmm.py > pre_analyze_openmm.py' ] for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): pre_ana_task.link_input_data += [ '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num) ] pre_ana_task.copy_output_data = [ 'tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path, cur_iter), 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path, cur_iter) ] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter pre_ana_stage.add_tasks(pre_ana_task) wf.add_stages(pre_ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=lsdmap', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['lsdmap'] #/u/sciteam/hruska/local/bin/lsdmap ana_task.arguments = [ '-f', os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro', '-n', 'out.nn', '-w', 'weight.w' ] ana_task.cores = 1 ana_task.link_input_data = [ '$SHARED/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '%s/iter_%s/tmpha.gro > tmpha.gro' % (combined_path, cur_iter) ] ana_task.copy_output_data = [ 'lsdmap.log > $SHARED/results/iter_%s_lsdmap.log' % cur_iter, 'tmpha.eg > $SHARED/results/iter_%s_tmpha.eg' % cur_iter, #'lsdmap.log > output/iter_%s/lsdmap.log'%cur_iter, 'tmpha.ev > %s/iter_%s/tmpha.ev' % (combined_path, cur_iter), 'tmpha.eps > %s/iter_%s/tmpha.eps' % (combined_path, cur_iter), 'tmpha.eg > %s/iter_%s/tmpha.eg' % (combined_path, cur_iter), 'out.nn > %s/iter_%s/out.nn' % (combined_path, cur_iter), 'lsdmap.log > %s/iter_%s/lsdmap.log' % (combined_path, cur_iter) ] if cur_iter > 0: ana_task.link_input_data += [ '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1) ] if (cur_iter % Kconfig.nsave == 0): ana_task.download_output_data = [ 'lsdmap.log > ./results/iter_%s_lsdmap.log' % cur_iter ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # post_lsdmap: # Purpose: Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop # to generate the new coordinate file to be used by the simulation_step in the next iteration. # Arguments: # num_replicas = number of configurations to be generated in the new coordinate file # out = output filename # cycle = iteration number # max_dead_neighbors = max dead neighbors to be considered # max_alive_neighbors = max alive neighbors to be considered # numCUs = number of simulation instances/ number of smaller files post_ana_stage = Stage() post_ana_task = Task() post_ana_task._name = 'post_ana_task' if Kconfig.restarts == 'clustering': post_ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module add bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=post_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] post_ana_task.executable = ['python'] post_ana_task.arguments = [ 'post_analyze.py', Kconfig.num_replicas, 'tmpha.ev', 'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro', Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors, 'input.gro', cur_iter, Kconfig.num_parallel_MD_sim, 'weight_out.w', 'tmpha.eg' ] post_ana_task.link_input_data = [ '$SHARED/post_analyze.py > post_analyze.py', '$SHARED/selection.py > selection.py', '$SHARED/selection-cluster.py > selection-cluster.py', '$SHARED/reweighting.py > reweighting.py', '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py', '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1), '%s/iter_%s/tmp.gro > tmp.gro' % (combined_path, cur_iter), '%s/iter_%s/tmpha.ev > tmpha.ev' % (combined_path, cur_iter), '%s/iter_%s/tmpha.eg > tmpha.eg' % (combined_path, cur_iter), '%s/iter_%s/out.nn > out.nn' % (combined_path, cur_iter) ] if (cur_iter % Kconfig.nsave == 0): post_ana_task.download_output_data = [ 'out.gro > output/iter_%s/out.gro' % cur_iter, 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path, cur_iter, cur_iter) ] post_ana_task.copy_output_data = [ 'ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path, cur_iter), 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path, cur_iter), 'out.gro > %s/iter_%s/out.gro' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > ./results/iter_%s_plot-scatter-cluster-10d.png' % cur_iter, 'plot-scatter-cluster-10d-counts.png > ./results/iter_%s_plot-scatter-cluster-10d-counts.png' % cur_iter, 'plot-scatter-cluster-10d-ncopiess.png > ./results/iter_%s_plot-scatter-cluster-10d-ncopiess.png' % cur_iter ] post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, post_ana_stage.uid, post_ana_task.uid) post_ana_stage.add_tasks(post_ana_task) wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def get_pipeline(instance, iterations): # Create a Pipeline object p = Pipeline() # Create Stage 1 s1 = Stage() # Create a Task t1 = Task() t1.pre_exec = ['module load python/2.7.7-anaconda'] t1.executable = ['python'] t1.arguments = [ 'analysis_1.py', '--template', 'CB7G3_template.mdp', '--newname', 'CB7G3_run.mdp', '--wldelta', '2', '--equilibrated', 'False', '--lambda_state', '0', '--seed', '%s' % SEED ] t1.cores = 1 t1.copy_input_data = [ '$SHARED/CB7G3_template.mdp', '$SHARED/analysis_1.py' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) for it in range(1, iterations + 1): # Create Stage 2 s2 = Stage() # Create a Task t2 = Task() t2.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if it == 0: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s1.uid, t1.uid), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s4.uid, t4.uid), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.uid, s3.uid, t3.uid) ] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() # Create a Task t3 = Task() t3.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.uid, s2.uid, t2.uid) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format(it, instance) ] t3.download_output_data = [ 'CB7G3.xtc > CB7G3_run{1}_gen{0}.xtc'.format(it, instance), 'CB7G3.log > CB7G3_run{1}_gen{0}.log'.format(it, instance), 'CB7G3_dhdl.xvg > CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.gro > CB7G3_run{1}_gen{0}.gro'.format(it, instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() # Create a Task t4 = Task() t4.pre_exec = [ 'module load python', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis/alchemical_analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', #'--prev_data=%s'%DATA_LOC '--gen={0}'.format(it, instance), '--run={1}'.format(it, instance) ] t4.cores = 1 t4.link_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > results_run{1}_gen{0}.txt'.format( it, instance), 'STDOUT > stdout_run{1}_gen{0}'.format(it, instance), 'STDERR > stderr_run{1}_gen{0}'.format(it, instance), 'CB7G3_run.mdp > CB7G3_run{1}_gen{0}.mdp'.format(it, instance), 'results_average.txt > results_average_run{1}_gen{0}.txt'.format( it, instance) ] # Add the Task to the Stage s4.add_tasks(t4) # Add Stage to the Pipeline p.add_stages(s4) return p
def GenerateTask(tcfg, ecfg, pipe_name, stage_name, task_name): # Initialize a task object t = Task() # Define magic variable dictionary mvar_dict = {"PIPELINE_ID": pipe_name} # Give this task object a name t.name = task_name # Pre exec let you load modules, set environment before executing the workload if tcfg['pre_exec'] != "": t.pre_exec = [tcfg['pre_exec']] # Executable to use for the task t.executable = tcfg['executable'] # If there's a user-defined input file (likely for genmod modules), add it to the # options list and upload file list if needed if "input_data_file" in tcfg['options']: tcfg['upload_input_data'].append( os.path.join(ecfg['exp_dir'], "input", ecfg['input_data_file'])) # List of arguments for the executable t.arguments = [tcfg['script']] + match_options(tcfg['options'], ecfg['options']) # CPU requirements for this task t.cpu_threads = { 'processes': tcfg['cpu']['processes'], 'process-type': tcfg['cpu']['process-type'], 'threads-per-process': tcfg['cpu']['threads-per-process'], 'thread-type': tcfg['cpu']['thread-type'], } # Upload data from your local machine to the remote machine # Note: Remote machine can be the local machine t.upload_input_data = tcfg['upload_input_data'] # Copy data from other stages/tasks for use in this task copy_list = [] if "copy_input_data" in tcfg.keys(): for copy_stage in tcfg['copy_input_data'].keys(): for copy_task in tcfg['copy_input_data'][copy_stage].keys(): loc = "$Pipeline_{0}_Stage_{1}_Task_{2}".format( pipe_name, copy_stage, copy_task) copy_list.extend([ '{0}/{1}'.format(loc, mvar_replace_dict(mvar_dict, x)) for x in tcfg['copy_input_data'][copy_stage][copy_task] ]) # Append the copy list (if any) to the task object t.copy_input_data = copy_list # Set the download data for the task download_list = [] outdir = os.path.join(ecfg['exp_dir'], "output") if "download_output_data" in tcfg.keys(): download_list.extend([ '{0} > {1}/{0}'.format(mvar_replace_dict(mvar_dict, x), outdir) for x in tcfg['download_output_data'] ]) # Append the download list to this task t.download_output_data = download_list # Return the task object return (t)
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) #run-tica-msm4.py md_settings = Kconfig.md_env if Kconfig.env_ana_same == 'True': ana_settings = md_settings else: ana_settings = Kconfig.ana_env print("set", num_parallel, md_settings) iter_found = 0 while len(glob.glob('%s/iter%s_input*.pdb' % (combined_path, iter_found))) >= num_replicas: iter_found += 1 cur_iter = max(0, iter_found - 1) print("cur_iter", cur_iter) if cur_iter == 0: #pre_proc_stage = Stage() #pre_proc_task = Task() #pre_proc_task.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'] #pre_proc_task.executable = ['mv'] #pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] #pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage.uid, pre_proc_task.uid) #pre_proc_stage.add_tasks(pre_proc_task) #wf.add_stages(pre_proc_stage) pre_proc_stage2 = Stage() pre_proc_task2 = Task() pre_proc_task2.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task2.executable = ['ls'] pre_proc_task2.arguments = ['-l'] pre_proc_task2.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana), '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file), '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference) ] # '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) pre_proc_stage2.add_tasks(pre_proc_task2) wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ start_iter = cur_iter while (cur_iter < int(Kconfig.num_iterations) and cur_iter < start_iter + 1): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() num_allocated_rep = 0 num_used_parallel = 0 #num_used_threads=0 #print(def_rep_per_thread) while (num_allocated_rep < num_replicas): #if (num_used_threads>=num_parallel): # print("ALLERT tried use more gpus than allocated") def_rep_per_thread = int( math.ceil( float(num_replicas - num_allocated_rep) / float(num_parallel - num_used_parallel))) use_replicas = min(def_rep_per_thread, num_replicas - num_allocated_rep) #if ((num_replicas-num_allocated_rep)>def_rep_per_thread): # check if use all threads # use_replicas=def_rep_per_thread #else: #use pnly part of threads # use_replicas=(num_replicas-num_allocated_rep) print("u", cur_iter, use_replicas, num_replicas, num_parallel, def_rep_per_thread, num_allocated_rep, num_used_parallel) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } sim_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 20, 'thread_type': 'OpenMP' } sim_task.arguments = [ 'run_openmm.py', '--trajstride', str(Kconfig.trajstride), '--Kconfig', str(args.Kconfig), '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', str(Kconfig.save_alltraj), '>', 'md.log' ] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/system-5.xml > system-5.xml', '$SHARED/integrator-5.xml > integrator-5.xml', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] #if str(Kconfig.strategy)=='extend': # for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)] #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): ## #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas num_used_parallel = num_used_parallel + 1 sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = ana_settings + [ 'export tasks=tica_msm_ana', 'export iter=%s' % cur_iter ] ana_task.executable = ['python'] ana_task.arguments = [ script_ana, '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '--ref', str(Kconfig.md_reference), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': 'MPI', 'threads_per_process': 16, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf