def get_pipeline(tasks): # Create a Pipeline object p = Pipeline() # Create a Stage 1 s1 = Stage() # Create a Task object according to the app_name t1 = Task() t1.pre_exec = ['module load gromacs/5.0/INTEL-140-MVAPICH2-2.0'] t1.executable = app_coll['grompp']['executable'] t1.arguments = app_coll['grompp']['arguments'] t1.cores = app_coll['grompp']['cores'] t1.link_input_data = [ '$SHARED/grompp.mdp > grompp.mdp', '$SHARED/input.gro > input.gro', '$SHARED/topol.top > topol.top' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create a Stage 2 s2 = Stage() for cnt in range(tasks): # Create a Task object according to the app_name t2 = Task() t2.pre_exec = [ 'module load gromacs/5.0/INTEL-140-MVAPICH2-2.0', 'export OMP_NUM_THREADS=%s' % num_cores ] t2.executable = app_coll['mdrun']['executable'] t2.arguments = app_coll['mdrun']['arguments'] #t2.cores = app_coll['mdrun']['cores'] t2.cores = num_cores t2.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/topol.tpr' % (p.uid, s1.uid, t1.uid) ] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) return p
def test_assignment_exceptions(): t = Task() data_type = [1, 'a', True, list()] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description. """ pipeline = 'p1' stage = 's1' task = 't1' placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}} t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = 'grompp' t1.arguments = ['hello'] t1.cpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s cud = create_cud_from_task(t1, placeholders) assert cud.name == '%s,%s,%s,%s,%s,%s' % ( t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.post_exec == t1.post_exec assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert { 'source': 'upload_input.dat', 'target': 'upload_input.dat' } in cud.input_staging assert { 'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat' } in cud.input_staging assert { 'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat' } in cud.input_staging assert { 'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat' } in cud.output_staging assert { 'source': 'download_output.dat', 'target': 'download_output.dat' } in cud.output_staging
def test_input_list_from_task(): """ **Purpose**: Test if the 'get_input_list_from_task' function generates the correct RP input transfer directives when given a Task. """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}} for t in [1, 'a', list(), dict(), True]: with pytest.raises(TypeError): t = list() get_input_list_from_task(t, placeholders) # Test link input data t = Task() t.link_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['source'] == t.link_input_data[0] assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0]) t = Task() t.link_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['source'] == t.link_input_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.link_input_data[0].split('>')[1].strip()) # Test copy input data t = Task() t.copy_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['source'] == t.copy_input_data[0] assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0]) t = Task() t.copy_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['source'] == t.copy_input_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.copy_input_data[0].split('>')[1].strip()) # Test move input data t = Task() t.move_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['source'] == t.move_input_data[0] assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0]) t = Task() t.move_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['source'] == t.move_input_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.move_input_data[0].split('>')[1].strip()) # Test upload input data t = Task() t.upload_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert 'action' not in ip_list[0] assert ip_list[0]['source'] == t.upload_input_data[0] assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0]) t = Task() t.upload_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert 'action' not in ip_list[0] assert ip_list[0]['source'] == t.upload_input_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.upload_input_data[0].split('>')[1].strip())
def test_input_list_from_task(): """ **Purpose**: Test if the 'get_input_list_from_task' function generates the correct RP input transfer directives when given a Task """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } for t in [1, 'a', list(), dict(), True]: with pytest.raises(TypeError): t = list() get_input_list_from_task(t, placeholder_dict) # Test link input data t = Task() t.link_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.link_input_data[0] assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0]) t = Task() t.link_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.link_input_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0].split('>')[1].strip()) # Test copy input data t = Task() t.copy_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_input_data[0] assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0]) t = Task() t.copy_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_input_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0].split('>')[1].strip()) # Test move input data t = Task() t.move_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_input_data[0] assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0]) t = Task() t.move_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_input_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0].split('>')[1].strip()) # Test upload input data t = Task() t.upload_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.upload_input_data[0] assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0]) t = Task() t.upload_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.upload_input_data[0].split('>')[0].strip() assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0].split('>')[1].strip())
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) num_replicas = int(Kconfig.num_replicas) #if cur_iter==0: # restart_iter=0 #else: # restart_iter=cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['mv'] pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file) ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() def_rep_per_thread = int(num_replicas / num_parallel) + 1 num_allocated_rep = 0 num_used_threads = 0 while (num_allocated_rep < num_replicas): if (num_used_threads == num_parallel): print("ALLERT tried use more gpus than allocated") if ((num_replicas - num_allocated_rep) > def_rep_per_thread): use_replicas = def_rep_per_thread else: use_replicas = (num_replicas - num_allocated_rep) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--trajstride', '10', '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log' ] link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr if str(Kconfig.strategy) == 'extend': copy_out = [] for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter + 1), idx) ] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=tica_msm_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['python'] ana_task.arguments = [ 'run-tica-msm.py', '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/run-tica-msm.py > run-tica-msm.py', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': [], 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': { 'uid': None, 'name': None }, 'parent_pipeline': { 'uid': None, 'name': None } } t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': ['sleep'], 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': { 'uid': 's1', 'name': 'stage1' }, 'parent_pipeline': { 'uid': 'p1', 'name': 'pipeline1' } }
def get_pipeline(instance, iterations): # Create a Pipeline object p = Pipeline() # Create Stage 1 s1 = Stage() # Create a Task t1 = Task() t1.pre_exec = ['module load python/2.7.7-anaconda'] t1.executable = ['python'] t1.arguments = [ 'analysis_1.py', '--template', 'CB7G3_template.mdp', '--newname', 'CB7G3_run.mdp', '--wldelta', '2', '--equilibrated', 'False', '--lambda_state', '0', '--seed', '%s' % SEED ] t1.cores = 1 t1.copy_input_data = [ '$SHARED/CB7G3_template.mdp', '$SHARED/analysis_1.py' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) for it in range(1, iterations + 1): # Create Stage 2 s2 = Stage() # Create a Task t2 = Task() t2.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if it == 0: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s1.uid, t1.uid), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s4.uid, t4.uid), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.uid, s3.uid, t3.uid) ] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() # Create a Task t3 = Task() t3.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.uid, s2.uid, t2.uid) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format(it, instance) ] t3.download_output_data = [ 'CB7G3.xtc > CB7G3_run{1}_gen{0}.xtc'.format(it, instance), 'CB7G3.log > CB7G3_run{1}_gen{0}.log'.format(it, instance), 'CB7G3_dhdl.xvg > CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.gro > CB7G3_run{1}_gen{0}.gro'.format(it, instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() # Create a Task t4 = Task() t4.pre_exec = [ 'module load python', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis/alchemical_analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', #'--prev_data=%s'%DATA_LOC '--gen={0}'.format(it, instance), '--run={1}'.format(it, instance) ] t4.cores = 1 t4.link_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > results_run{1}_gen{0}.txt'.format( it, instance), 'STDOUT > stdout_run{1}_gen{0}'.format(it, instance), 'STDERR > stderr_run{1}_gen{0}'.format(it, instance), 'CB7G3_run.mdp > CB7G3_run{1}_gen{0}.mdp'.format(it, instance), 'results_average.txt > results_average_run{1}_gen{0}.txt'.format( it, instance) ] # Add the Task to the Stage s4.add_tasks(t4) # Add Stage to the Pipeline p.add_stages(s4) return p
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle """ with open("exchangePairs.dat","r") as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] #MD Engine, Blue Waters md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(Book[Cycle-1][r]), #'%s/mdin_{0}'.format(r)%(Book[k-1][r])] '%s/mdin'%(Book[Cycle-1][r])] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user? #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) q.add_stages(md_stg) ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print d #print Book return q
def generate_pipeline(cfg): cfg_file = cfg['run_cfg_file'] # resource and workload config run_file = cfg['run_file'] # runs for this campaign # setup S1 workload cfg = ru.Config(cfg=ru.read_json(cfg_file)) runs = check_runs(cfg_file, run_file) if not runs: print('S1: nothing to run, exiting.') return # for each run in the campaign: # - create cfg with requested receptor and smiles # - create a number of masters as EnTK tasks and add them to a pipeline # - submit configured number of masters with that cfg # setup EnTK pipeline p = Pipeline() p.name = 'S1.RAPTOR' s = Stage() # create cfg subs = dict() rurl = cfg.fs_url + cfg.workload.results d = rs.filesystem.Directory(rurl) ls = [str(u).split('/')[-1] for u in d.list()] workload = cfg.workload for receptor, smiles, n_workers, runtime in runs: print('%30s %s' % (receptor, smiles)) name = '%s_-_%s' % (receptor, smiles) tgt = '%s.%s.gz' % (name, workload.output) cpw = cfg.cpw gpw = cfg.gpw n_masters = cfg.n_masters cfg.workload.receptor = receptor cfg.workload.smiles = smiles cfg.workload.name = name cfg.runtime = runtime cfg.n_workers = n_workers print('n_workers: %d' % cfg.n_workers) ru.write_json(cfg, 'configs/wf0.%s.cfg' % name) for i in range(n_masters): t = Task() t.pre_exec = [ '. /gpfs/alpine/scratch/mturilli1/med110/radical.pilot.sandbox/s1.to/bin/activate' ] t.executable = "python3" t.arguments = ['wf0_master.py', i] t.cpu_reqs = { 'processes': 1, 'threads_per_process': 4, 'thread_type': None, 'process_type': None } t.upload_input_data = [ 'wf0_master.py', 'wf0_worker.py', 'configs/wf0.%s.cfg > wf0.cfg' % name, 'read_ligand_dict.py' ] t.link_input_data = ['%s > input_dir' % workload.input_dir] #t.download_output_data = ['%s.%s.gz > results/%s.%s.gz' % # (name, workload.output, name, workload.output)] s.add_tasks(t) p.add_stages(s) return p
def test_task_exceptions(s,l,i,b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s,l,i,b] for data in data_type: if not isinstance(data,str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data,list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, list): with pytest.raises(TypeError): t.executable = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': str(), 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': {'uid':None, 'name': None}, 'parent_pipeline': {'uid':None, 'name': None}} t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}} t.executable = 'sleep' d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}
def create_workflow(Kconfig): # User settings ENSEMBLE_SIZE = int(Kconfig.num_CUs) # Number of ensemble members TOTAL_ITERS = int( Kconfig.num_iterations) # Number of iterations to run current trial wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ ''' pre_proc_stage : Purpose : Transfers files, Split the input file into smaller files to be used by each of the gromacs instances in the first iteration. Arguments : inputfile = file to be split numCUs = number of simulation instances/ number of smaller files ''' pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = ['module load bwpy'] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', Kconfig.num_CUs, os.path.basename(Kconfig.md_input_file) ] pre_proc_task.copy_input_data = [ '$SHARED/%s' % os.path.basename(Kconfig.md_input_file), '$SHARED/spliter.py', '$SHARED/gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ cur_iter = 0 while (cur_iter < TOTAL_ITERS): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() for sim_num in range(ENSEMBLE_SIZE): sim_task = Task() sim_task.pre_exec = [ 'source /u/sciteam/balasubr/modules/gromacs/build-cpu-serial/bin/GMXRC.bash', 'module load bwpy', 'module load platform-mpi', 'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH', 'export PATH=/u/sciteam/balasubr/.local/bin:$PATH' ] sim_task.executable = ['python'] sim_task.cores = 16 sim_task.arguments = [ 'run.py', '--mdp', os.path.basename(Kconfig.mdp_file), '--top', os.path.basename(Kconfig.top_file), '--gro', 'start.gro', '--out', 'out.gro' ] sim_task.link_input_data = [ '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.mdp_file)), '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.top_file)), '$SHARED/run.py > run.py' ] if Kconfig.ndx_file is not None: sim_task.link_input_data.append('$SHARED/{0}'.format( os.path.basename(Kconfig.ndx_file))) if (cur_iter == 0): sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (pre_proc_task_ref, sim_num)) else: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (post_ana_task_ref, sim_num)) sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulation stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated pre_ana_stage = Stage() pre_ana_task = Task() pre_ana_task.pre_exec = [ 'source /u/sciteam/balasubr/modules/gromacs/build-cpu-serial/bin/GMXRC.bash', 'module load bwpy' ] pre_ana_task.executable = ['python'] pre_ana_task.arguments = [ 'pre_analyze.py', Kconfig.num_CUs, 'tmp.gro', '.' ] pre_ana_task.link_input_data = [ '$SHARED/pre_analyze.py > pre_analyze.py' ] for sim_num in range(ENSEMBLE_SIZE): pre_ana_task.link_input_data += [ '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num) ] pre_ana_task.copy_output_data = [ 'tmpha.gro > $SHARED/iter_%s/tmpha.gro' % cur_iter, 'tmp.gro > $SHARED/iter_%s/tmp.gro' % cur_iter ] pre_ana_stage.add_tasks(pre_ana_task) wf.add_stages(pre_ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module load bwpy', 'module load platform-mpi', 'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH', 'export PATH=/u/sciteam/balasubr/.local/bin:$PATH', 'source /u/sciteam/balasubr/ve-extasy/bin/activate' ] ana_task.executable = ['lsdmap'] ana_task.arguments = [ '-f', os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro', '-n', 'out.nn', '-w', 'weight.w' ] ana_task.cores = 1 ana_task.link_input_data = [ '$SHARED/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '$SHARED/iter_%s/tmpha.gro > tmpha.gro' % cur_iter ] ana_task.copy_output_data = [ 'tmpha.ev > $SHARED/iter_%s/tmpha.ev' % cur_iter, 'out.nn > $SHARED/iter_%s/out.nn' % cur_iter ] if cur_iter > 0: ana_task.link_input_data += [ '%s/weight.w > weight.w' % ana_task_ref ] ana_task.copy_output_data += [ 'weight.w > $SHARED/iter_%s/weight.w' % cur_iter ] if (cur_iter % Kconfig.nsave == 0): ana_task.download_output_data = [ 'lsdmap.log > output/iter%s/lsdmap.log' % cur_iter ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # post_lsdmap: # Purpose: Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop # to generate the new coordinate file to be used by the simulation_step in the next iteration. # Arguments: # num_runs = number of configurations to be generated in the new coordinate file # out = output filename # cycle = iteration number # max_dead_neighbors = max dead neighbors to be considered # max_alive_neighbors = max alive neighbors to be considered # numCUs = number of simulation instances/ number of smaller files post_ana_stage = Stage() post_ana_task = Task() post_ana_task.pre_exec = [ 'module load bwpy', 'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH', 'export PATH=/u/sciteam/balasubr/.local/bin:$PATH', 'source /u/sciteam/balasubr/ve-extasy/bin/activate' ] post_ana_task.executable = ['python'] post_ana_task.arguments = [ 'post_analyze.py', Kconfig.num_runs, 'tmpha.ev', 'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro', Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors, 'input.gro', cur_iter, Kconfig.num_CUs ] post_ana_task.link_input_data = [ '$SHARED/post_analyze.py > post_analyze.py', '$SHARED/selection.py > selection.py', '$SHARED/reweighting.py > reweighting.py', '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py', '$SHARED/iter_%s/tmp.gro > tmp.gro' % cur_iter, '$SHARED/iter_%s/tmpha.ev > tmpha.ev' % cur_iter, '$SHARED/iter_%s/out.nn > out.nn' % cur_iter, '$SHARED/input.gro > input.gro' ] if cur_iter > 0: post_ana_task.link_input_data += [ '%s/weight.w > weight_new.w' % ana_task_ref ] if (cur_iter % Kconfig.nsave == 0): post_ana_task.download_output_data = [ 'out.gro > output/iter%s/out.gro' % cur_iter, 'weight.w > output/iter%s/weight.w' % cur_iter ] post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, post_ana_stage.uid, post_ana_task.uid) post_ana_stage.add_tasks(post_ana_task) wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 return wf
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description """ pipeline = 'p1' stage = 's1' task = 't1' placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = ['grompp'] t1.arguments = ['hello'] t1.cpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s p._assign_uid('test') cud = create_cud_from_task(t1, placeholder_dict) assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert cud.post_exec == t1.post_exec assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) #run-tica-msm4.py vpy4_settings = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module load bwpy/1.2.4', 'module add bwpy-mpi', 'module add fftw/3.3.4.10', 'module add cray-netcdf', 'module add cudatoolkit/9.1.85_3.10-1.0502.df1cc54.3.1', 'module add cmake/3.1.3', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy4/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] vpy4_2_settings = [ 'module unload bwpy', 'module load bwpy', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LIBRARY_PATH="${BWPY_LIBRARY_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'module load craype-ml-plugin-py3/1.1.0', 'export MPICH_GNI_MALLOC_FALLBACK=enabled', ' export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugin', 'source /projects/sciteam/bamm/hruska/vpy4/bin/activate', 'bwpy-environ', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] vpy8_settings = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload gcc', 'module load gcc/5.3.0', 'module unload bwpy', 'module load bwpy/2.0.0-pre0', 'module load bwpy-mpi', 'module add cudatoolkit', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'module load craype-ml-plugin-py3/1.1.0', 'export MPICH_GNI_MALLOC_FALLBACK=enable', 'export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugins', 'source /projects/sciteam/bamm/hruska/vpy8/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] vpy9_settings = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload gcc', 'module load gcc/5.3.0', 'module unload bwpy', 'module load bwpy/2.0.0-pre1', 'module load bwpy-mpi', 'module add cudatoolkit', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'export MPICH_GNI_MALLOC_FALLBACK=enable', 'export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugins', 'source /projects/sciteam/bamm/hruska/vpy9/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] #'module load craype-ml-plugin-py3/1.1.0' if Kconfig.md_env == 'vpy4': md_settings = vpy4_settings if Kconfig.md_env == 'vpy8': md_settings = vpy8_settings if Kconfig.ana_env == 'vpy8': ana_settings = vpy8_settings if Kconfig.md_env == 'vpy9': md_settings = vpy9_settings if Kconfig.ana_env == 'vpy9': ana_settings = vpy9_settings #if cur_iter==0: # restart_iter=0 #else: # restart_iter=cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['mv'] pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) pre_proc_stage2 = Stage() pre_proc_task2 = Task() pre_proc_task2.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task2.executable = ['ls'] pre_proc_task2.arguments = ['-l'] pre_proc_task2.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana), '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file), '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference), '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) pre_proc_stage2.add_tasks(pre_proc_task2) wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() def_rep_per_thread = int(np.ceil(num_replicas / num_parallel)) num_allocated_rep = 0 num_used_threads = 0 while (num_allocated_rep < num_replicas): if (num_used_threads == num_parallel): print("ALLERT tried use more gpus than allocated") if ((num_replicas - num_allocated_rep) > def_rep_per_thread): use_replicas = def_rep_per_thread else: use_replicas = (num_replicas - num_allocated_rep) sim_task = Task() sim_task.executable = ['bwpy-environ'] #'python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'python', 'run_openmm.py', '--trajstride', str(Kconfig.trajstride), '--Kconfig', str(args.Kconfig), '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log' ] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/system-5.xml > system-5.xml', '$SHARED/integrator-5.xml > integrator-5.xml', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] if str(Kconfig.strategy) == 'extend': for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter + 1), idx) ] for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ 'md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx) ] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = ana_settings + [ 'export tasks=tica_msm_ana', 'export iter=%s' % cur_iter ] ana_task.executable = ['bwpy-environ'] ana_task.arguments = [ 'python', script_ana, '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '--ref', str(Kconfig.md_reference), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': 'MPI', 'threads_per_process': 16, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) #run-tica-msm4.py md_settings = Kconfig.md_env if Kconfig.env_ana_same == 'True': ana_settings = md_settings else: ana_settings = Kconfig.ana_env print("set", num_parallel, md_settings) iter_found = 0 while len(glob.glob('%s/iter%s_input*.pdb' % (combined_path, iter_found))) >= num_replicas: iter_found += 1 cur_iter = max(0, iter_found - 1) print("cur_iter", cur_iter) if cur_iter == 0: #pre_proc_stage = Stage() #pre_proc_task = Task() #pre_proc_task.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'] #pre_proc_task.executable = ['mv'] #pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] #pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage.uid, pre_proc_task.uid) #pre_proc_stage.add_tasks(pre_proc_task) #wf.add_stages(pre_proc_stage) pre_proc_stage2 = Stage() pre_proc_task2 = Task() pre_proc_task2.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task2.executable = ['ls'] pre_proc_task2.arguments = ['-l'] pre_proc_task2.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana), '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file), '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference) ] # '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) pre_proc_stage2.add_tasks(pre_proc_task2) wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ start_iter = cur_iter while (cur_iter < int(Kconfig.num_iterations) and cur_iter < start_iter + 1): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() num_allocated_rep = 0 num_used_parallel = 0 #num_used_threads=0 #print(def_rep_per_thread) while (num_allocated_rep < num_replicas): #if (num_used_threads>=num_parallel): # print("ALLERT tried use more gpus than allocated") def_rep_per_thread = int( math.ceil( float(num_replicas - num_allocated_rep) / float(num_parallel - num_used_parallel))) use_replicas = min(def_rep_per_thread, num_replicas - num_allocated_rep) #if ((num_replicas-num_allocated_rep)>def_rep_per_thread): # check if use all threads # use_replicas=def_rep_per_thread #else: #use pnly part of threads # use_replicas=(num_replicas-num_allocated_rep) print("u", cur_iter, use_replicas, num_replicas, num_parallel, def_rep_per_thread, num_allocated_rep, num_used_parallel) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } sim_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 20, 'thread_type': 'OpenMP' } sim_task.arguments = [ 'run_openmm.py', '--trajstride', str(Kconfig.trajstride), '--Kconfig', str(args.Kconfig), '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', str(Kconfig.save_alltraj), '>', 'md.log' ] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/system-5.xml > system-5.xml', '$SHARED/integrator-5.xml > integrator-5.xml', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] #if str(Kconfig.strategy)=='extend': # for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)] #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): ## #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas num_used_parallel = num_used_parallel + 1 sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = ana_settings + [ 'export tasks=tica_msm_ana', 'export iter=%s' % cur_iter ] ana_task.executable = ['python'] ana_task.arguments = [ script_ana, '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '--ref', str(Kconfig.md_reference), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': 'MPI', 'threads_per_process': 16, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def one_cycle(p, workflow_cfgs, resource): ## Simulation related parameters sim_pre_exec = workflow_cfg[resource]['simulation']['pre_exec'] sim_cpus = workflow_cfg[resource]['simulation']['cpus'] ## Analysis related parameters ana_pre_exec = workflow_cfg[resource]['analysis']['pre_exec'] ana_cpus = workflow_cfg[resource]['analysis']['cpus'] task1_output = ['4ake-target_autopsf.situs'] task2_output = ['4ake-target_autopsf-grid.dx'] task3_output = ['1ake-docked-noh_autopsf-grid.pdb'] task4_output = ['1ake-extrabonds.txt'] task5_output = [ '1ake-extrabonds-cispeptide.txt', '1ake-extrabonds-chirality.txt' ] first_stage = Stage() # We use names of pipelines, stages, tasks to refer to data of a # particular task first_stage.name = 'Generating a simulated density map' # Create tasks and add them to stage task1 = Task() task1.name = 'Starting to load the target PDB' task1.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task1.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt task1_tcl_cmds = ['mol new 4ake-target.pdb'] task1_tcl_cmds += ['package require autopsf'] task1_tcl_cmds += ['autopsf 4ake-target.pdb'] task1_tcl_cmds += ['set sel [atomselect top all]'] task1_tcl_cmds += ['package require mdff'] task1_tcl_cmds += ['mdff sim $sel -res 5 -o {}'.format(task1_output[0])] task1_tcl_cmds += ['mol new {}'.format(task1_output[0])] set_vmd_run(task1, task1_tcl_cmds, "first_stage.tcl") #task.copy_input_data = ["first_stage.tcl"j task1.link_input_data = [ "$SHARED/%s" % x for x in workflow_cfg[resource]['shared_data'] ] first_stage.add_tasks(task1) # Add sim_stage to Pipeline p.add_stages(first_stage) second_stage = Stage() second_stage.name = 'Converting the density map to an MDFF potential' task2 = Task() task2.name = 'generate dx file' task2.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task2.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt task2_tcl_cmds = ['package require mdff'] task2_tcl_cmds += [ 'mdff griddx -i {} -o {}'.format(task1_output[0], task2_output[0]) ] task2.copy_input_data = [ '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, first_stage.name, task1.name, task1_output[0]) ] task2.link_input_data = [ ("$SHARED/%s" % x) for x in \ workflow_cfg[resource]['shared_data'] ] set_vmd_run(task2, task2_tcl_cmds, "second_stage.tcl") second_stage.add_tasks(task2) p.add_stages(second_stage) third_stage = Stage() third_stage.name = 'Preparing the initial structure' task3 = Task() task3.name = 'Starting to load the initial structure' task3.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task3.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt task3_tcl_cmds = ['mol new 1ake-docked-noh.pdb'] task3_tcl_cmds += ['package require autopsf'] task3_tcl_cmds += ['autopsf 1ake-docked-noh.pdb'] task3_tcl_cmds += ['package require mdff'] task3_tcl_cmds += [ 'mdff gridpdb -psf 1ake-docked-noh_autopsf.psf -pdb 1ake-docked-noh_autopsf.pdb -o {}' .format(task3_output[0]) ] task3.link_input_data = [ "$SHARED/%s" % x for x in workflow_cfg[resource]['shared_data'] ] set_vmd_run(task3, task3_tcl_cmds, "third_stage.tcl") third_stage.add_tasks(task3) p.add_stages(third_stage) fourth_stage = Stage() fourth_stage.name = 'Defining secondary structure restraints' task4 = Task() task4.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task4.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt task4_tcl_cmds = [ 'package require ssrestraints', 'mol new 1ake-docked-noh_autopsf.psf', 'mol addfile 1ake-docked-noh_autopsf.pdb', 'ssrestraints -psf 1ake-docked-noh_autopsf.psf -pdb 1ake-docked-noh_autopsf.pdb -o {} -hbonds' .format(task4_output[0]) ] task4.copy_input_data = [ '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf.pdb'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf.psf') ] set_vmd_run(task4, task4_tcl_cmds, "fourth_stage.tcl") fourth_stage.add_tasks(task4) p.add_stages(fourth_stage) fifth_stage = Stage() fifth_stage.name = 'cispeptide and chirality restraints' task5 = Task() task5.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt task5.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task5_tcl_cmds = [ 'mol new 1ake-docked-noh_autopsf.psf', 'mol addfile 1ake-docked-noh_autopsf.pdb', 'package require cispeptide', 'package require chirality', 'cispeptide restrain -o {}'.format(task5_output[0]), 'chirality restrain -o {}'.format(task5_output[1]) ] task5.copy_input_data = [ '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf.pdb'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf.psf') ] set_vmd_run(task5, task5_tcl_cmds, 'fifth_stage.tcl') fifth_stage.add_tasks(task5) p.add_stages(fifth_stage) sixth_stage = Stage() sixth_stage.name = 'Running the MDFF simulation with NAMD' task6 = Task() task6.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task6.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt task6_tcl_cmds = ['package require mdff'] task6_tcl_cmds += [ 'mdff setup -o adk -psf 1ake-docked-noh_autopsf.psf ' \ + '-pdb 1ake-docked-noh_autopsf.pdb ' \ + '-griddx 4ake-target_autopsf-grid.dx ' \ + '-gridpdb 1ake-docked-noh_autopsf-grid.pdb ' \ + '-extrab {1ake-extrabonds.txt 1ake-extrabonds-cispeptide.txt 1ake-extrabonds-chirality.txt} ' \ + '-gscale 0.3 -numsteps 50000' ] task6.copy_input_data = [ '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, second_stage.name, task2.name, '4ake-target_autopsf-grid.dx'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf.pdb'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf.psf'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf-grid.pdb'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, fourth_stage.name, task4.name, task4_output[0]), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, fifth_stage.name, task5.name, task5_output[0]), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, fifth_stage.name, task5.name, task5_output[1]) ] set_vmd_run(task6, task6_tcl_cmds, "sixth_stage.tcl") sixth_stage.add_tasks(task6) p.add_stages(sixth_stage) seventh_stage = Stage() seventh_stage.name = "NAMD simulation" task7 = Task() task7.cpu_reqs['processes'] = int(sim_cpus) // summit_hw_thread_cnt task7.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task7.cpu_reqs['process_type'] = 'MPI' task7.cpu_reqs['thread_type'] = 'OpenMP' task7.pre_exec = sim_pre_exec task7.executable = [namd_path] task7.arguments = ['+ppn', summit_hw_thread_cnt, 'adk-step1.namd'] task7.copy_input_data = [ '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name, task6.name, 'adk-step1.namd'), #'$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name, task6.name, 'adk-step2.namd'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, sixth_stage.name, task6.name, '1ake-docked-noh_autopsf.psf'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, sixth_stage.name, task6.name, '1ake-docked-noh_autopsf.pdb'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, sixth_stage.name, task6.name, '1ake-docked-noh_autopsf-grid.pdb'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, sixth_stage.name, task6.name, '4ake-target_autopsf-grid.dx'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, sixth_stage.name, task6.name, '1ake-extrabonds-chirality.txt'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, sixth_stage.name, task6.name, '1ake-extrabonds-cispeptide.txt'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name, task6.name, '1ake-extrabonds.txt'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name, task6.name, 'mdff_template.namd'), #'$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name, task6.name, 'par_all27_prot_lipid_na.inp') '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name, task6.name, 'par_all36_prot.prm') ] task7.download_output_data = ['adk-step1.dcd'] seventh_stage.add_tasks(task7) #task7_2 = Task() #task7_2.cpu_reqs['threads_per_process'] = sim_cpus #task7_2.executable = [ 'namd2' ] #task7_2.executable = [ 'namd2' ] #task7_2.arguments = ['+ppn', sim_cpus, 'adk-step2.namd'] #seventh_stage.add_tasks(task7_2) p.add_stages(seventh_stage) # Visualizing the MDFF trajectory # # mol new 4ake-target_autopsf.psf # mol addfile 4ake-target_autopsf.pdb # mol new 1ake-docked-noh_autopsf.psf # mol addfile 1ake-docked-noh_autopsf-docked.pdb # mol addfile adk-step1.dcd # mol addfile adk-step2.dcd eighth_stage = Stage() eighth_stage.name = 'Calculating the root mean square deviation' task8 = Task() task8.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task8.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt task8_tcl_cmds = [ 'mol new 1ake-docked-noh_autopsf.psf', 'mol addfile adk-step1.dcd waitfor all', 'mol new 4ake-target_autopsf.pdb', 'package require mdff', 'mdff check -rmsd -refpdb 4ake-target_autopsf.pdb' ] task8.copy_input_data = [ '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, first_stage.name, task1.name, '4ake-target_autopsf.pdb'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf.psf'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, seventh_stage.name, task7.name, 'adk-step1.dcd') ] set_vmd_run(task8, task8_tcl_cmds, "eighth_stage.tcl") eighth_stage.add_tasks(task8) p.add_stages(eighth_stage) ninth_stage = Stage() ninth_stage.name = 'Calculating the root mean square deviation for backbone atoms' task9 = Task() task9.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task9.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt task9_tcl_cmds = [ 'mol new 1ake-docked-noh_autopsf.psf', 'mol addfile adk-step1.dcd waitfor all', 'mol new 4ake-target_autopsf.pdb', 'package require mdff', 'set selbb [atomselect 0 "backbone"]', 'set selbbref [atomselect 1 "backbone"]', '$selbb frame 0', 'measure rmsd $selbb $selbbref', '$selbb frame last', 'measure rmsd $selbb $selbbref' ] task9.copy_input_data = [ '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, first_stage.name, task1.name, '4ake-target_autopsf.pdb'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf.psf'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, seventh_stage.name, task7.name, 'adk-step1.dcd') ] set_vmd_run(task9, task9_tcl_cmds, "ninth_stage.tcl") ninth_stage.add_tasks(task9) p.add_stages(ninth_stage) tenth_stage = Stage() tenth_stage.name = 'Calculating the cross-correlation coefficient' task10 = Task() task10.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt task10.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt task10_tcl_cmds = ['mol new 1ake-docked-noh_autopsf.psf'] task10_tcl_cmds += ['mol addfile adk-step1.dcd waitfor all' ] # load the full mdff trajectory #task10_tcl_cmds += [ 'mol new 4ake-target_autopsf.stius' ] # load target EM density task10_tcl_cmds += [ 'package require mdff', 'set selall [atomselect 0 "all"]', '$selall frame 0', 'mdff ccc $selall -i target-density-5A.dx -res 5', '$selall frame last', 'mdff ccc $selall -i target-density-5A.dx -res 5' ] task10.copy_input_data = [ '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, second_stage.name, task2.name, '4ake-target_autopsf-grid.dx'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format( p.name, third_stage.name, task3.name, '1ake-docked-noh_autopsf.psf'), '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, seventh_stage.name, task7.name, 'adk-step1.dcd') ] task10.link_input_data = [ "$SHARED/%s" % x for x in workflow_cfg[resource]['shared_data'] ] set_vmd_run(task10, task10_tcl_cmds, "tenth_stage.tcl") tenth_stage.add_tasks(task10) p.add_stages(tenth_stage)
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle """ with open("exchangePairs.dat","r") as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] #MD Engine, Blue Waters md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(Book[Cycle-1][r]), #'%s/mdin_{0}'.format(r)%(Book[k-1][r])] '%s/mdin'%(Book[Cycle-1][r])] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user? #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) q.add_stages(md_stg) ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print d #print Book return q #p = InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod) #q = Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod) #return (p, q)
def cycle(k): #read exchangePairs.dat # with open("exchangePairs.dat","r") as f: ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range (Replicas): md_tsk = Task() md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine, Blue Waters #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[k-1][ExchangeArray[n0]]), '%s/prmtop'%(Book[k-1][n0]), #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])] '%s/mdin'%(Book[k-1][n0])] ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Preexec, BLue Waters #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) #print d md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #Create exchange stage ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print d #print Book return p
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory) num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) config_file = str(args.Kconfig).rsplit('/', 1)[-1] try: systemxml = str(Kconfig.systemxml) except: systemxml = 'system-5.xml' try: integratorxml = str(Kconfig.integratorxml) except: integratorxml = 'integrator-5.xml' md_settings = Kconfig.md_env if Kconfig.env_ana_same == 'True': ana_settings = md_settings else: ana_settings = Kconfig.ana_env print("set", num_parallel, md_settings) iter_found = 0 while len(glob.glob('%s/iter%s_input*.pdb' % (combined_path, iter_found))) >= num_replicas: iter_found += 1 cur_iter = max(0, iter_found - 1) print("cur_iter", cur_iter) #if cur_iter==0: # pre_proc_stage2 = Stage() # pre_proc_task2 = Task() # pre_proc_task2.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'] # pre_proc_task2.executable = ['ls'] # pre_proc_task2.arguments = ['-l'] # pre_proc_task2.copy_input_data = ['$SHARED/%s > %s/%s' % (config_file,combined_path, config_file), # '$SHARED/%s > %s/%s' % (script_ana,combined_path,script_ana), # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file), # '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference)]# '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] # pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) # pre_proc_stage2.add_tasks(pre_proc_task2) # wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ start_iter = cur_iter while (cur_iter < int(Kconfig.num_iterations) and cur_iter < start_iter + 1): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() num_allocated_rep = 0 num_used_parallel = 0 while (num_allocated_rep < num_replicas): def_rep_per_thread = int( math.ceil( float(num_replicas - num_allocated_rep) / float(num_parallel - num_used_parallel))) use_replicas = min(def_rep_per_thread, num_replicas - num_allocated_rep) print("u", cur_iter, use_replicas, num_replicas, num_parallel, def_rep_per_thread, num_allocated_rep, num_used_parallel) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } sim_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 10, 'thread_type': 'OpenMP' } sim_task.arguments = [ 'run_openmm.py', '--Kconfig', config_file, '--idxstart', str(num_allocated_rep), '--idxend', str(num_allocated_rep + use_replicas), '--path', combined_path, '>', 'md.log' ] #'--trajstride', str(Kconfig.trajstride),'--Kconfig', str(args.Kconfig), #'--idxstart',str(num_allocated_rep), '--idxend',str((num_allocated_rep+use_replicas)), #'--path',combined_path,'--iter',str(cur_iter), #'--md_steps',str(Kconfig.md_steps), '--save_traj', 'True','>', 'md.log'] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (systemxml, systemxml), '$SHARED/%s > %s' % (integratorxml, integratorxml), '$SHARED/%s > %s' % (config_file, config_file) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (config_file, config_file) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] #if str(Kconfig.strategy)=='extend': # for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)] #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): ## #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas num_used_parallel = num_used_parallel + 1 sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) if str(Kconfig.strategy) != 'extend': for anatask in range(1): print("analysis task", anatask) ana_task = Task() ana_task.executable = ['python'] pre_exec_arr = ana_settings ana_task.pre_exec = pre_exec_arr ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (config_file, config_file) ] ana_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } ana_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 10, 'thread_type': 'OpenMP' } ana_task.arguments = [ script_ana, '--Kconfig', config_file, '>', "analysis.log" ] ana_task.copy_output_data = [ 'analysis.log > %s/analysis_iter%s_r%s.log' % (combined_path, cur_iter, anatask) ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, sim_stage.uid, ana_task.uid) sim_stage.add_tasks(ana_task) wf.add_stages(sim_stage) cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def generate_pipeline(cfg): cfg_file = cfg['run_cfg_file'] # resource and workload config run_file = cfg['run_file'] # runs for this campaign # setup S1 workload cfg = ru.Config(cfg=ru.read_json(cfg_file)) runs = check_runs(cfg_file, run_file) if not runs: print('S1: nothing to run, exiting.') return # for each run in the campaign: # - create cfg with requested receptor and smiles # - create a number of masters as EnTK tasks and add them to a pipeline # - submit configured number of masters with that cfg # setup EnTK pipeline p = Pipeline() p.name = 'S1-RAPTOR' s = Stage() # create cfg subs = dict() rurl = cfg.fs_url + cfg.workload.results d = rs.filesystem.Directory(rurl) ls = [str(u).split('/')[-1] for u in d.list()] workload = cfg.workload for receptor, smiles, nodes, runtime in runs: print('%30s %s' % (receptor, smiles)) name = '%s_-_%s' % (receptor, smiles) tgt = '%s.%s.gz' % (name, workload.output) # rec = False # if tgt in ls: # if workload.recompute: # rec += 1 # d.move(tgt, tgt + '.bak') # else: # print('skip 1 %s' % name) # continue # if smiles in ls: # if smiles not in subs: # subs[smiles] = [str(u).split('/')[-1] for u in d.list('%s/*' % smiles)] # if tgt in subs[smiles]: # if workload.recompute: # rec += 2 # d.move('%s/%s' % (smiles, tgt), # '%s/%s.bak' % (smiles, tgt)) # else: # print('skip 2 %s' % name) # continue ## if os.path.exists('results/%s.%s.gz' % (name, wofkload.output)): ## print('skip 3 %s' % name) ## continue #if rec: print('recompute %d %s' % (rec, name)) #else : print('compute 2 %s' % name) cpn = cfg.cpn gpn = cfg.gpn n_masters = cfg.n_masters cfg.workload.receptor = receptor cfg.workload.smiles = smiles cfg.workload.name = name cfg.nodes = nodes cfg.runtime = runtime cfg.n_workers = int(nodes / n_masters - 1) print('n_workers: %d' % cfg.n_workers) ru.write_json(cfg, 'configs/wf0.%s.cfg' % name) for i in range(n_masters): t = Task() t.pre_exec = [ '. /gpfs/alpine/scratch/mturilli1/med110/radical.pilot.sandbox/s1.to/bin/activate' ] t.executable = "python3" t.arguments = ['wf0_master.py', i] t.cpu_threads = cpn t.upload_input_data = [ 'wf0_master.py', 'wf0_worker.py', 'configs/wf0.%s.cfg > wf0.cfg' % name, 'read_ligand_dict.py' ] t.link_input_data = ['%s > input_dir' % workload.input_dir] t.download_output_data = [ '%s.%s.gz > results/%s.%s.gz' % (name, workload.output, name, workload.output) ] # t.input_staging = [{'source': 'wf0_master.py', # 'target': 'wf0_master.py', # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': 'wf0_worker.py', # 'target': 'wf0_worker.py', # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': 'configs/wf0.%s.cfg' % name, # 'target': 'wf0.cfg', # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': workload.input_dir, # 'target': 'input_dir', # 'action': rp.LINK, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': workload.impress_dir, # 'target': 'impress_md', # 'action': rp.LINK, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': 'read_ligand_dict.py', # 'target': 'read_ligand_dict.py', # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}, # ] # t.output_staging = [{'source': '%s.%s.gz' % (name, workload.output), # 'target': 'results/%s.%s.gz' % (name, workload.output), # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}] s.add_tasks(t) p.add_stages(s) return p
def test_task_exceptions(s, l, i, b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s, l, i, b] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def cycle(k): #read exchangePairs.dat # with open("exchangePairs.dat", "r") as f: ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range(Replicas): md_tsk = Task() md_tsk.executable = [ '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI' ] #MD Engine, Blue Waters #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (Book[k - 1][ExchangeArray[n0]]), '%s/prmtop' % (Book[k - 1][n0]), #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])] '%s/mdin' % (Book[k - 1][n0]) ] ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/' ] #Preexec, BLue Waters #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = [ '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0) ] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid, md_tsk.uid) #print d md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #Create exchange stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range(Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print d #print Book return p
def create_workflow(Kconfig): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-grlsd' if cur_iter == 0: restart_iter = 0 else: restart_iter = cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro', '--clone', str(Kconfig.num_replicas) ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/iter_%s/input.gro' % (os.path.basename(Kconfig.md_input_file), combined_path, cur_iter), '$SHARED/%s > input.gro' % os.path.basename(Kconfig.md_input_file), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ else: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro' ] pre_proc_task.copy_input_data = [ '%s/iter_%s/out.gro > input.gro' % (combined_path, cur_iter - 1), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): sim_task = Task() if Kconfig.use_gpus == 'False': sim_task.executable = [ '/sw/bw/bwpy/0.3.0/python-single/usr/bin/python' ] sim_task.pre_exec = [ 'module load bwpy', 'export PYTHONPATH="/u/sciteam/hruska/local/lib/python2.7/site-packages:/u/sciteam/hruska/local:/u/sciteam/hruska/local/lib/python:$PYTHONPATH"', 'export PATH=/u/sciteam/hruska/local/bin:$PATH', 'export iter=%s' % cur_iter ] sim_task.cores = int( Kconfig.num_CUs_per_MD_replica ) #on bluewaters tasks on one node are executed concurently else: sim_task.executable = ['python'] sim_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--gro', 'start.gro', '--out', 'out.gro', '--md_steps', str(Kconfig.md_steps), '--save_traj', 'False', '>', 'md.log' ] sim_task.link_input_data = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) if restart_iter == cur_iter: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (pre_proc_task_ref, sim_num)) else: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (post_ana_task_ref, sim_num)) sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated pre_ana_stage = Stage() pre_ana_task = Task() pre_ana_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=pre_ana', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_ana_task.executable = ['python'] pre_ana_task.arguments = ['pre_analyze_openmm.py'] pre_ana_task.link_input_data = [ '$SHARED/pre_analyze_openmm.py > pre_analyze_openmm.py' ] for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): pre_ana_task.link_input_data += [ '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num) ] pre_ana_task.copy_output_data = [ 'tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path, cur_iter), 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path, cur_iter) ] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter pre_ana_stage.add_tasks(pre_ana_task) wf.add_stages(pre_ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=lsdmap', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['lsdmap'] #/u/sciteam/hruska/local/bin/lsdmap ana_task.arguments = [ '-f', os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro', '-n', 'out.nn', '-w', 'weight.w' ] ana_task.cores = 1 ana_task.link_input_data = [ '$SHARED/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '%s/iter_%s/tmpha.gro > tmpha.gro' % (combined_path, cur_iter) ] ana_task.copy_output_data = [ 'lsdmap.log > $SHARED/results/iter_%s_lsdmap.log' % cur_iter, 'tmpha.eg > $SHARED/results/iter_%s_tmpha.eg' % cur_iter, #'lsdmap.log > output/iter_%s/lsdmap.log'%cur_iter, 'tmpha.ev > %s/iter_%s/tmpha.ev' % (combined_path, cur_iter), 'tmpha.eps > %s/iter_%s/tmpha.eps' % (combined_path, cur_iter), 'tmpha.eg > %s/iter_%s/tmpha.eg' % (combined_path, cur_iter), 'out.nn > %s/iter_%s/out.nn' % (combined_path, cur_iter), 'lsdmap.log > %s/iter_%s/lsdmap.log' % (combined_path, cur_iter) ] if cur_iter > 0: ana_task.link_input_data += [ '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1) ] if (cur_iter % Kconfig.nsave == 0): ana_task.download_output_data = [ 'lsdmap.log > ./results/iter_%s_lsdmap.log' % cur_iter ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # post_lsdmap: # Purpose: Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop # to generate the new coordinate file to be used by the simulation_step in the next iteration. # Arguments: # num_replicas = number of configurations to be generated in the new coordinate file # out = output filename # cycle = iteration number # max_dead_neighbors = max dead neighbors to be considered # max_alive_neighbors = max alive neighbors to be considered # numCUs = number of simulation instances/ number of smaller files post_ana_stage = Stage() post_ana_task = Task() post_ana_task._name = 'post_ana_task' if Kconfig.restarts == 'clustering': post_ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module add bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=post_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] post_ana_task.executable = ['python'] post_ana_task.arguments = [ 'post_analyze.py', Kconfig.num_replicas, 'tmpha.ev', 'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro', Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors, 'input.gro', cur_iter, Kconfig.num_parallel_MD_sim, 'weight_out.w', 'tmpha.eg' ] post_ana_task.link_input_data = [ '$SHARED/post_analyze.py > post_analyze.py', '$SHARED/selection.py > selection.py', '$SHARED/selection-cluster.py > selection-cluster.py', '$SHARED/reweighting.py > reweighting.py', '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py', '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1), '%s/iter_%s/tmp.gro > tmp.gro' % (combined_path, cur_iter), '%s/iter_%s/tmpha.ev > tmpha.ev' % (combined_path, cur_iter), '%s/iter_%s/tmpha.eg > tmpha.eg' % (combined_path, cur_iter), '%s/iter_%s/out.nn > out.nn' % (combined_path, cur_iter) ] if (cur_iter % Kconfig.nsave == 0): post_ana_task.download_output_data = [ 'out.gro > output/iter_%s/out.gro' % cur_iter, 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path, cur_iter, cur_iter) ] post_ana_task.copy_output_data = [ 'ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path, cur_iter), 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path, cur_iter), 'out.gro > %s/iter_%s/out.gro' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > ./results/iter_%s_plot-scatter-cluster-10d.png' % cur_iter, 'plot-scatter-cluster-10d-counts.png > ./results/iter_%s_plot-scatter-cluster-10d-counts.png' % cur_iter, 'plot-scatter-cluster-10d-ncopiess.png > ./results/iter_%s_plot-scatter-cluster-10d-ncopiess.png' % cur_iter ] post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, post_ana_stage.uid, post_ana_task.uid) post_ana_stage.add_tasks(post_ana_task) wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def generate_pipeline(name, stages): #generate the pipeline of prediction and blob detection # Create a Pipeline object p = Pipeline() p.name = name for s_cnt in range(stages): if(s_cnt==0): # Create a Stage object s0 = Stage() s0.name = 'Stage %s'%s_cnt # Create Task 1, training t1 = Task() t1.name = 'Predictor' t1.pre_exec = ['module load psc_path/1.1', 'module load slurm/default', 'module load intel/17.4', 'module load python3', 'module load cuda', 'mkdir -p classified_images/crabeater', 'mkdir -p classified_images/weddel', 'mkdir -p classified_images/pack-ice', 'mkdir -p classified_images/other', 'source /pylon5/mc3bggp/paraskev/pytorchCuda/bin/activate' ] t1.executable = 'python3' # Assign executable to the task # Assign arguments for the task executable t1.arguments = ['pt_predict.py','-class_names','crabeater','weddel','pack-ice','other'] t1.link_input_data = ['/pylon5/mc3bggp/paraskev/seal_test/nn_model.pth.tar', '/pylon5/mc3bggp/paraskev/nn_images', '/pylon5/mc3bggp/paraskev/seal_test/test_images' ] t1.upload_input_data = ['pt_predict.py','sealnet_nas_scalable.py'] t1.cpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'} t1.gpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'} s0.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s0) else: # Create a Stage object s1 = Stage() s1.name = 'Stage %s'%s_cnt # Create Task 2, t2 = Task() t2.pre_exec = ['module load psc_path/1.1', 'module load slurm/default', 'module load intel/17.4', 'module load python3', 'module load cuda', 'module load opencv', 'source /pylon5/mc3bggp/paraskev/pytorchCuda/bin/activate', 'mkdir -p blob_detected' ] t2.name = 'Blob_detector' t2.executable = ['python3'] # Assign executable to the task # Assign arguments for the task executable t2.arguments = ['blob_detector.py'] t2.upload_input_data = ['blob_detector.py'] t2.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/classified_images'%(p.uid, s0.uid, t1.uid)] t2.download_output_data = ['blob_detected/'] #Download resuting images t2.cpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'} t2.gpu_reqs = {'processes': 1, 'threads_per_process': 1, 'thread_type': 'OpenMP'} s1.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s1) return p