def test_issue_199(): # Create a dictionary describe four mandatory keys: # resource, walltime, cpus and project # resource is 'local.localhost' to execute locally res_dict = { 'resource': 'local.localhost', 'walltime': 10, 'cpus': 1 } os.environ['RADICAL_PILOT_DBURL'] = MLAB # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Assign resource manager to the Application Manager appman.resource_desc = res_dict p = generate_pipeline() # Assign the workflow as a set of Pipelines to the Application Manager appman.workflow = [p] # Run the Application Manager appman.run()
def test_amgr_terminate(): res_dict = { 'resource': 'xsede.supermic', 'walltime': 30, 'cpus': 20, 'project': 'TG-MCB090174' } from radical.entk.execman.rp import TaskManager amgr = Amgr(rts='radical.pilot', hostname=hostname, port=port) amgr.resource_desc = res_dict amgr._setup_mqs() amgr._rmq_cleanup = True amgr._task_manager = TaskManager(sid='test', pending_queue=list(), completed_queue=list(), mq_hostname=amgr._mq_hostname, rmgr=amgr._resource_manager, port=amgr._port ) amgr.terminate()
def test_issue_271(): # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Create a dictionary describe four mandatory keys: # resource, walltime, and cpus # resource is 'local.localhost' to execute locally res_dict = { 'resource': 'local.localhost', 'walltime': 10, 'cpus': 1 } # Assign resource request description to the Application Manager appman.resource_desc = res_dict # Assign the workflow as a set or list of Pipelines to the Application Manager # Note: The list order is not guaranteed to be preserved p = generate_pipeline() appman.workflow = set([p]) # Run the Application Manager appman.run() # assert for t in p.stages[0].tasks: assert t.state == states.FAILED
def test_stage_post_exec(): global p1 p1.name = 'p1' s = Stage() s.name = 's1' for t in range(NUM_TASKS): s.add_tasks(create_single_task()) s.post_exec = condition p1.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 30, 'cpus': 1, } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(rts='radical.pilot', hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run()
def test_issue_26(): def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] s.add_tasks(t1) p.add_stages(s) return p res_dict = { 'resource': 'local.localhost', 'walltime': 10, 'cpus': 1, 'project': '' } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port, autoterminate=False) appman.resource_desc = res_dict p1 = create_pipeline() appman.workflow = [p1] appman.run() print p1.uid, p1.stages[0].uid p2 = create_pipeline() appman.workflow = [p2] appman.run() print p2.uid, p2.stages[0].uid appman.resource_terminate() lhs = int(p1.stages[0].uid.split('.')[-1]) + 1 rhs = int(p2.stages[0].uid.split('.')[-1]) assert lhs == rhs for t in p1.stages[0].tasks: for tt in p2.stages[0].tasks: lhs = int(t.uid.split('.')[-1]) + 1 rhs = int(tt.uid.split('.')[-1]) assert lhs == rhs
def test_state_order(): """ **Purpose**: Test if the Pipeline, Stage and Task are assigned their states in the correct order """ def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/date'] t1.copy_input_data = [] t1.copy_output_data = [] return t1 p1 = Pipeline() p1.name = 'p1' s = Stage() s.name = 's1' s.tasks = create_single_task() s.add_tasks(create_single_task()) p1.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, 'project': '' } os.environ['RADICAL_PILOT_DBURL'] = MLAB os.environ['RP_ENABLE_OLD_DEFINES'] = 'True' appman = Amgr(hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run() p_state_hist = p1.state_history assert p_state_hist == ['DESCRIBED', 'SCHEDULING', 'DONE'] s_state_hist = p1.stages[0].state_history assert s_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'DONE'] tasks = p1.stages[0].tasks for t in tasks: t_state_hist = t.state_history assert t_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'SUBMITTING', 'SUBMITTED', 'EXECUTED', 'DEQUEUEING', 'DEQUEUED', 'DONE']
def test_amgr_resource_description_assignment(): res_dict = { 'resource': 'xsede.supermic', 'walltime': 30, 'cpus': 1000, 'project': 'TG-MCB090174' } amgr = Amgr(rts='radical.pilot') amgr.resource_desc = res_dict from radical.entk.execman.rp import ResourceManager assert isinstance(amgr._resource_manager, ResourceManager) amgr = Amgr(rts='mock') amgr.resource_desc = res_dict from radical.entk.execman.mock import ResourceManager assert isinstance(amgr._resource_manager, ResourceManager)
def test_issue_236(): ''' Create folder temp to transfer as input to task: . ./temp ./temp/dir1 ./temp/dir1/file2.txt ./temp/file1.txt ''' os.makedirs('%s/temp' %cur_dir) os.makedirs('%s/temp/dir1' %cur_dir) os.system('echo "Hello world" > %s/temp/file1.txt' %cur_dir) os.system('echo "Hello world" > %s/temp/dir1/file2.txt' %cur_dir) # Create a dictionary describe four mandatory keys: # resource, walltime, cpus and project # resource is 'local.localhost' to execute locally res_dict = { 'resource': 'local.localhost', 'walltime': 10, 'cpus': 1 } os.environ['RADICAL_PILOT_DBURL'] = MLAB # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Assign resource manager to the Application Manager appman.resource_desc = res_dict p = generate_pipeline() # Assign the workflow as a set of Pipelines to the Application Manager appman.workflow = [p] # Run the Application Manager appman.run() # Assert folder movement assert len(glob('/tmp/temp*')) >=1 assert len(glob('/tmp/temp/dir*')) ==1 assert len(glob('/tmp/temp/*.txt')) ==1 assert len(glob('/tmp/temp/dir1/*.txt')) ==1 # Cleanup shutil.rmtree('%s/temp' %cur_dir) shutil.rmtree('/tmp/temp')
def test_issue_255(): def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['sleep'] t1.arguments = ['10'] s.add_tasks(t1) p.add_stages(s) return p res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, 'project': '' } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port, autoterminate=False) appman.resource_desc = res_dict p1 = create_pipeline() appman.workflow = [p1] appman.run() # p1 = create_pipeline() # appman.workflow = [p1] # appman.run() # appman.resource_terminate() tmgr = appman._task_manager tmgr.terminate_manager() # tmgr.terminate_heartbeat() tmgr.start_manager()
def test_shared_data(): for f in glob('%s/file*.txt' %cur_dir): os.remove(f) os.system('echo "Hello" > %s/file1.txt' %cur_dir) os.system('echo "World" > %s/file2.txt' %cur_dir) # Create a dictionary describe four mandatory keys: # resource, walltime, cpus and project # resource is 'local.localhost' to execute locally res_dict = { 'resource': 'local.localhost', 'walltime': 1, 'cpus': 1 } os.environ['RADICAL_PILOT_DBURL'] = MLAB # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Assign resource manager to the Application Manager appman.resource_desc = res_dict appman.shared_data = ['%s/file1.txt' %cur_dir, '%s/file2.txt' %cur_dir] p = generate_pipeline() # Assign the workflow as a set of Pipelines to the Application Manager appman.workflow = [p] # Run the Application Manager appman.run() with open('%s/output.txt' %cur_dir, 'r') as fp: assert [d.strip() for d in fp.readlines()] == ['Hello', 'World'] os.remove('%s/file1.txt' %cur_dir) os.remove('%s/file2.txt' %cur_dir) os.remove('%s/output.txt' %cur_dir)
def test_integration_local(): """ **Purpose**: Run an EnTK application on localhost """ def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] return t1 p1 = Pipeline() p1.name = 'p1' s = Stage() s.name = 's1' s.tasks = create_single_task() s.add_tasks(create_single_task()) p1.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, 'project': '' } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run()
def test_amgr_assign_shared_data(s,i,b,se): amgr = Amgr(rts='radical.pilot', hostname=hostname, port=port) res_dict = { 'resource': 'xsede.supermic', 'walltime': 30, 'cpus': 20, 'project': 'TG-MCB090174' } amgr.resource_desc = res_dict data = [s, i, b, se] for d in data: with pytest.raises(TypeError): amgr.shared_data = d amgr.shared_data = ['file1.txt','file2.txt'] assert amgr._resource_manager.shared_data == ['file1.txt','file2.txt']
def test_diff_rmq(): def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] s.add_tasks(t1) p.add_stages(s) return p res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port) appman.resource_desc = res_dict p1 = create_pipeline() print p1.uid, p1.stages[0].uid appman.workflow = [p1] appman.run()
def test_amgr_run_mock(): p = Pipeline() s = Stage() t = Task() t.name = 'simulation' t.executable = ['/bin/date'] s.tasks = t p.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, 'project': '' } appman = Amgr(hostname=hostname, port=port, rts="mock") appman.resource_desc = res_dict appman.workflow = [p] appman.run()
} # Add Stage to the Pipeline p.add_stages(s1) return p if __name__ == '__main__': # Create a dictionary describe four mandatory keys: # resource, walltime, cores and project # resource is 'local.localhost' to execute locally res_dict = { 'resource': 'local.localhost', 'walltime': 15, 'cpus': 2, } # Create Application Manager appman = AppManager() appman.resource_desc = res_dict p = generate_pipeline() # Assign the workflow as a set of Pipelines to the Application Manager appman.workflow = [p] # Run the Application Manager appman.run()
def test_rp_da_scheduler_bw(): """ **Purpose**: Run an EnTK application on localhost """ p1 = Pipeline() p1.name = 'p1' n = 10 s1 = Stage() s1.name = 's1' for x in range(n): t = Task() t.name = 't%s'%x t.executable = ['/bin/hostname'] t.arguments = ['>','hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.lfs_per_process = 10 t.download_output_data = ['hostname.txt > s1_t%s_hostname.txt'%(x)] s1.add_tasks(t) p1.add_stages(s1) s2 = Stage() s2.name = 's2' for x in range(n): t = Task() t.executable = ['/bin/hostname'] t.arguments = ['>','hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.download_output_data = ['hostname.txt > s2_t%s_hostname.txt'%(x)] t.tag = 't%s'%x s2.add_tasks(t) p1.add_stages(s2) res_dict = { 'resource' : 'ncsa.bw_aprun', 'walltime' : 10, 'cpus' : 128, 'project' : 'gk4', 'queue' : 'high' } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run() for i in range(n): assert open('s1_t%s_hostname.txt'%i,'r').readline().strip() == open('s2_t%s_hostname.txt'%i,'r').readline().strip() txts = glob('%s/*.txt' % os.getcwd()) for f in txts: os.remove(f)
def test_stage_post_exec(): p1 = Pipeline() p1.name = 'p1' s = Stage() s.name = 's1' def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = '/bin/echo' t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] return t1 NUM_TASKS = 2 MAX_STAGES = 5 CUR_STAGE = 1 def condition(): nonlocal CUR_STAGE, MAX_STAGES if CUR_STAGE < MAX_STAGES: CUR_STAGE += 1 on_true() on_false() def on_true(): nonlocal NUM_TASKS, CUR_STAGE, p1 NUM_TASKS *= 2 s = Stage() s.name = 's%s' % CUR_STAGE for _ in range(NUM_TASKS): s.add_tasks(create_single_task()) s.post_exec = condition p1.add_stages(s) def on_false(): pass for _ in range(NUM_TASKS): s.add_tasks(create_single_task()) s.post_exec = condition p1.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 30, 'cpus': 1, } appman = AppManager(rts='radical.pilot', hostname=hostname, port=port, username=username, password=password) appman.resource_desc = res_dict appman.workflow = [p1] appman.run()
def test_rp_da_scheduler_bw(): """ **Purpose**: Run an EnTK application on localhost """ p1 = Pipeline() p1.name = 'p1' n = 10 s1 = Stage() s1.name = 's1' for x in range(n): t = Task() t.name = 't%s'%x t.executable = '/bin/hostname' t.arguments = ['>','hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.lfs_per_process = 10 t.download_output_data = ['hostname.txt > s1_t%s_hostname.txt'%(x)] s1.add_tasks(t) p1.add_stages(s1) s2 = Stage() s2.name = 's2' for x in range(n): t = Task() t.executable = '/bin/hostname' t.arguments = ['>','hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.download_output_data = ['hostname.txt > s2_t%s_hostname.txt'%(x)] t.tag = 't%s'%x s2.add_tasks(t) p1.add_stages(s2) res_dict = { 'resource' : 'ncsa.bw_aprun', 'walltime' : 10, 'cpus' : 128, 'project' : 'gk4', 'queue' : 'high' } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run() for i in range(n): assert open('s1_t%s_hostname.txt'%i,'r').readline().strip() == open('s2_t%s_hostname.txt'%i,'r').readline().strip() txts = glob('%s/*.txt' % os.getcwd()) for f in txts: os.remove(f)
def workflow(cmt_filename, param_path): """This function submits the complete workflow :param cmt_filename: str containing the path to the cmt solution that is supposed to be inverted for Usage: ```bash python 1pipeline <path/to/cmtsolution> ``` """ # Get Database parameters databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") DB_params = read_yaml_file(databaseparam_path) # Earthquake specific database parameters: Dir and Cid Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_filename) # Earthquake file in the database cmt_file_db = os.path.join(Cdir, "C" + Cid + ".cmt") # Create a counter for all tasks in one pipeline task_counter = 0 # Create a Pipeline object p = Pipeline() if HEADNODE_AVAILABLE: # ---- Create Database Entry --------------------------------------------- # # Create Database entry stage: database_entry_stage, task_counter = create_entry( cmt_filename, param_path, task_counter) # Add Stage to the Pipeline p.add_stages(database_entry_stage) # ---- REQUEST DATA ------------------------------------------------- # # # Request data stage # datarequest_stage, task_counter = data_request(cmt_file_db, # param_path, # task_counter) # # # Add Stage to the Pipeline # p.add_stages(datarequest_stage) else: # Create the entry now before running the pipeline task_counter = call_create_entry(cmt_filename, param_path, task_counter) # # # Download the data from the headnode before running the pipeline # task_counter = call_download_data(cmt_file_db, param_path, # task_counter) # ---- Write Sources ---------------------------------------------------- # # # Create Source modification stage # w_sources_stage, task_counter = write_sources(cmt_file_db, param_path, # task_counter) # # # Add Stage to the Pipeline # p.add_stages(w_sources_stage) # ---- Run Specfem ------------------------------------------------------ # # # Create Specfem Stage # runSF3D_stage, task_counter = run_specfem(cmt_file_db, # param_path, # task_counter) # # # Add Simulation stage to the Pipeline # p.add_stages(runSF3D_stage) # # # ---- Clean Up Specfem ------------------------------------------------- # # # # Create clean_up stage # clean_up_stage, task_counter = specfem_clean_up(cmt_file_db, # param_path, # task_counter) # # # Add Stage to the Pipeline # p.add_stages(clean_up_stage) # ---- Convert to ASDF -------------------------------------------------- # # Create conversion stage conversion_stage, task_counter = convert_traces(cmt_file_db, param_path, task_counter) # Add stage to pipeline p.add_stages(conversion_stage) # ---- Create Process Path files ---------------------------------------- # # Create Process Stage Pipeline process_path_stage, task_counter = create_process_path_files( cmt_file_db, param_path, task_counter) p.add_stages(process_path_stage) # ---- Process Traces --------------------------------------------------- # # Create processing stage processing_stages, task_counter = create_processing_stage( cmt_file_db, param_path, task_counter) for stage in processing_stages: p.add_stages(stage) # ---- Window Traces ---------------------------------------------------- # # Create processing stage windowing_stages, task_counter = create_windowing_stage( cmt_file_db, param_path, task_counter) for windowing_stage in windowing_stages: p.add_stages(windowing_stage) # ---- Create Inversion Dictionaries------------------------------------- # # Create processing stage inv_dict_stage, task_counter = create_inversion_dict_stage( cmt_file_db, param_path, task_counter) p.add_stages(inv_dict_stage) # ---- Inversion -------------------------------------------------------- # # Create processing stage inversion_stage = create_inversion_stage(cmt_file_db, param_path, task_counter) p.add_stages(inversion_stage) # ============== RUNNING THE PIPELINE ==================================== # # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Compute the necessary walltime from walltime/per simulation # Load parameters specfem_specs = read_yaml_file( os.path.join(param_path, "SpecfemParams/SpecfemParams.yml")) # Get twalltime from walltime specification in the parameter file. walltime_per_simulation = specfem_specs["walltime"].split(":") hours_in_min = float(walltime_per_simulation[0]) * 60 min_in_min = float(walltime_per_simulation[1]) sec_in_min = float(walltime_per_simulation[2]) / 60 cpus = int(specfem_specs["cpus"]) tasks = int(specfem_specs["tasks"]) # Add times to get full simulation time. The 45 min are accounting for # everything that is not simulation time total_min = int(1/math.ceil(float(cpus)/40) \ * 10 * int(round(hours_in_min + min_in_min + sec_in_min)) + 45) # Create a dictionary describe four mandatory keys: # resource, walltime, cpus etc. # resource is "local.localhost" to execute locally # Define which resources to get depending on how specfem is run! if specfem_specs["GPU_MODE"] is False: # res_dict_cpu = { # "resource": "princeton.tiger_cpu", # "project": "geo", # "queue": "cpu", # "schema": "local", # "walltime": total_min, # "cpus": int(specfem_specs["cpus"]), # } res_dict_cpu = { "resource": "princeton.tiger_cpu", "project": "geo", "queue": "cpu", "schema": "local", "walltime": 45, "cpus": 20 } else: res_dict_gpu = { "resource": "princeton.tiger_gpu", "project": "geo", "queue": "gpu", "schema": "local", "walltime": 300, "cpus": int(specfem_specs["cpus"]), "gpus": int(specfem_specs["gpus"]) } # Assign resource request description to the Application Manager appman.resource_desc = res_dict_cpu # Assign the workflow as a set or list of Pipelines to the Application Manager # Note: The list order is not guaranteed to be preserved appman.workflow = set([p]) # Run the Application Manager appman.run()
def run_experiment(exp_dir, debug_mode): # Initialize a list for pipelines pipelines = [] # Define the configuration and resource file names rfile = os.path.join(exp_dir, "resource.yml") cfile = os.path.join(exp_dir, "config.yml") # Does the experiment configuration file exist? if not os.path.isfile(cfile): print('{} does not exist'.format(cfile)) sys.exit(1) # Does the resource file exist? if not os.path.isfile(rfile): print('{} does not exist'.format(rfile)) sys.exit(1) # Load the resource and experiment configuration files with open(rfile, 'r') as fp: rcfg = yaml.safe_load(fp) with open(cfile, 'r') as fp: ecfg = yaml.safe_load(fp) # Does the output directory exist? If not, make it try: os.makedirs(os.path.join(exp_dir, "output")) except OSError as e: if e.errno != errno.EEXIST: raise # Reserved configuration entries reserved_econfig_entries = [ "global-options", "total-options", "extremesealevel-options" ] # Are there global options? if "global-options" in ecfg.keys(): global_options = ecfg["global-options"] else: global_options = {} # Loop through the user-requested modules for this_mod in ecfg.keys(): # Skip this entry if it's not associated with SLR projection workflow if this_mod in reserved_econfig_entries: continue # Load the pipeline configuration file for this module pcfg_file = os.path.join(os.path.dirname(__file__), "modules", ecfg[this_mod]['module_set'], ecfg[this_mod]['module'], "pipeline.yml") if not os.path.isfile(pcfg_file): print('{} does not exist'.format(pcfg_file)) sys.exit(1) with open(pcfg_file, 'r') as fp: pcfg = yaml.safe_load(fp) # Append the global options to this module ecfg[this_mod]["options"].update(global_options) # Generate a pipeline for this module pipe_name = "-".join( (this_mod, ecfg[this_mod]['module_set'], ecfg[this_mod]['module'])) pipelines.append( GeneratePipeline(pcfg, ecfg[this_mod], pipe_name, exp_dir)) # Print out PST info if in debug mode if debug_mode: for p in pipelines: print("Pipeline {}:".format(p.name)) print("################################") pprint(p.to_dict()) for s in p.stages: print("Stage {}:".format(s.name)) print("============================") pprint(s.to_dict()) for t in s.tasks: print("Task {}:".format(t.name)) print("----------------------------") pprint(t.to_dict()) # Exit sys.exit(0) # Initialize the EnTK App Manager amgr = AppManager(hostname=rcfg['rabbitmq']['hostname'], port=rcfg['rabbitmq']['port'], autoterminate=False) # Apply the resource configuration provided by the user res_desc = { 'resource': rcfg['resource-desc']['name'], 'walltime': rcfg['resource-desc']['walltime'], 'cpus': rcfg['resource-desc']['cpus'], 'queue': rcfg['resource-desc']['queue'], 'project': rcfg['resource-desc']['project'] } amgr.resource_desc = res_desc # Assign the list of pipelines to the workflow amgr.workflow = pipelines amgr.terminate() return (None)
# self.arguments = ['-c', '24', '-t', '600'] # self.arguments = ['.sh file'] # t.parameters = parameter t.cpu_reqs = { 'processes': 1, 'thread_type': None, 'threads_per_process': 24, 'process_type': None } s.add_tasks(t) p.add_stages(s) pipelines.add(p) # Resource and AppManager amgr = AppManager(hostname='two.radical-project.org', port=33048) amgr.workflow = pipelines amgr.shared_data = [] amgr.resource_desc = { 'resource': 'xsede.comet', 'project': 'unc100', 'queue': 'compute', 'walltime': duration, 'cpus': (len(hparams)**2) * 24, 'access_schema': 'gsissh' } amgr.run()
#'%s/run_openmm.py' % Kconfig.helper_scripts, #'%s/pre_analyze.py' % Kconfig.helper_scripts, '%s/pre_analyze_openmm.py' % Kconfig.helper_scripts, '%s/post_analyze.py' % Kconfig.helper_scripts, #'%s/selection.py' % Kconfig.helper_scripts, '%s/selection-cluster.py' % Kconfig.helper_scripts, '%s/reweighting.py' % Kconfig.helper_scripts ] #if Kconfig.ndx_file is not None: # rman.shared_data.append(Kconfig.ndx_file) # Create Application Manager, only one extasy script on one rabbit-mq server now appman = AppManager(hostname='two.radical-project.org', port=33134) #port=args.port) appman.resource_desc = res_dict # appman = AppManager(port=) # if using docker, specify port here. # Assign resource manager to the Application Manager #appman.resource_manager = rman appman.shared_data = shared_data_all # Assign the workflow as a set of Pipelines to the Application Manager #appman.assign_workflow(wf) appman.workflow = set([wf]) # Run the Application Manager appman.run() except Exception as ex: print 'Error: {0}'.format(str(ex))
def main(cmt_filename): '''This tiny function runs shit Args: cmt_filename: str containing the path to the cmt solution that is supposed to be inverted for Usage: From the commandline: python pipeline <path/to/cmtsolution> ''' # Path to pipeline file pipelinepath = os.path.abspath(__file__) pipelinedir = os.path.dirname(pipelinepath) # Define parameter directory param_path = os.path.join(os.path.dirname(pipelinedir), "params") databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") DB_params = read_yaml_file(databaseparam_path) print(DB_params) # Earthquake specific database parameters # Dir and eq_id eq_dir, eq_id = get_eq_entry_path(DB_params["databasedir"], cmt_filename) # Earthquake file in the database cmt_file_db = os.path.join(eq_dir, "eq_" + eq_id + ".cmt") # Create a Pipeline object p = Pipeline() # ---- DATABASE ENTRY TASK ---------------------------------------------- # # Path to function create_database_func = os.path.join(pipelinedir, "01_Create_Database_Entry.py") # Create a Stage object database_entry = Stage() t1 = Task() t1.name = 'database-entry' t1.pre_exec = [ # Conda activate DB_params["conda-activate"] ] t1.executable = [DB_params['bin-python']] # Assign executable to the task t1.arguments = [create_database_func, os.path.abspath(cmt_filename)] # In the future maybe to database dir as a total log? t1.stdout = os.path.join(pipelinedir, "database-entry." + eq_id + ".stdout") t1.stderr = os.path.join(pipelinedir, "database-entry." + eq_id + ".stderr") # Add Task to the Stage database_entry.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(database_entry) # # ---- REQUEST DATA ----------------------------------------------------- # # # # Path to function # request_data_func = os.path.join(pipelinedir, "02_Request_Data.py") # # # Create a Stage object # datarequest = Stage() # # datarequest_t = Task() # datarequest_t.name = 'data-request' # datarequest_t.pre_exec = [ # Conda activate # DB_params["conda-activate"]] # datarequest_t.executable = [DB_params['bin-python']] # Assign executable # # to the task # datarequest_t.arguments = [request_data_func, cmt_file_db] # # # In the future maybe to database dir as a total log? # datarequest_t.stdout = os.path.join(pipelinedir, # "datarequest." + eq_id + ".stdout") # datarequest_t.stderr = os.path.join(pipelinedir, # "datarequest." + eq_id + ".stderr") # # # Add Task to the Stage # datarequest.add_tasks(datarequest_t) # # # Add Stage to the Pipeline # p.add_stages(datarequest) # ---- Write Sources ---------------------------------------------------- # # Path to function write_source_func = os.path.join(pipelinedir, "03_Write_Sources.py") # Create a Stage object w_sources = Stage() w_sources.name = 'Write-Sources' # Create Task for stage w_sources_t = Task() w_sources_t.name = 'Write-Sources' w_sources_t.pre_exec = [ # Conda activate DB_params["conda-activate"] ] w_sources_t.executable = [DB_params['bin-python']] # Assign executable # to the task w_sources_t.arguments = [write_source_func, cmt_file_db] # In the future maybe to database dir as a total log? w_sources_t.stdout = os.path.join(pipelinedir, "write_sources." + eq_id + ".stdout") w_sources_t.stderr = os.path.join(pipelinedir, "write_sources." + eq_id + ".stderr") # Add Task to the Stage w_sources.add_tasks(w_sources_t) # Add Stage to the Pipeline p.add_stages(w_sources) # ---- Run Specfem ----------------------------------------------------- # specfemspec_path = os.path.join(param_path, "SpecfemParams/SpecfemParams.yml") comp_and_modules_path = os.path.join( param_path, "SpecfemParams/" "CompilersAndModules.yml") # Load Parameters specfemspecs = read_yaml_file(specfemspec_path) cm_dict = read_yaml_file(comp_and_modules_path) attr = [ "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp", "CMT_depth", "CMT_lat", "CMT_lon" ] simdir = os.path.join(eq_dir, "CMT_SIMs") # Create a Stage object runSF3d = Stage() runSF3d.name = 'Simulation' for at in attr[0]: sf_t = Task() sf_t.name = 'run-' + at # Module Loading sf_t.pre_exec = [ # Get rid of existing modules 'module purge' ] for module in cm_dict["modulelist"]: sf_t.pre_exec.append("module load %s" % module) sf_t.pre_exec.append("module load %s" % cm_dict["gpu_module"]) # Change directory to specfem directories sf_t.pre_exec.append( # Change directory "cd %s" % os.path.join(simdir, at)) sf_t.executable = ['./bin/xspecfem3D'] # Assign executable # In the future maybe to database dir as a total log? sf_t.stdout = os.path.join(pipelinedir, "run_specfem." + eq_id + ".stdout") sf_t.stderr = os.path.join(pipelinedir, "run_specfem." + eq_id + ".stderr") sf_t.gpu_reqs = { 'processes': 6, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } # Add Task to the Stage runSF3d.add_tasks(sf_t) # Add Simulation stage to the Pipeline p.add_stages(runSF3d) # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Create a dictionary describe four mandatory keys: # resource, walltime, and cpus # resource is 'local.localhost' to execute locally res_dict = { 'resource': 'princeton.tiger_gpu', 'project': 'geo', 'queue': 'gpu', 'schema': 'local', 'walltime': 300, 'cpus': 2, 'gpus': 6 } # Assign resource request description to the Application Manager appman.resource_desc = res_dict # Assign the workflow as a set or list of Pipelines to the Application Manager # Note: The list order is not guaranteed to be preserved appman.workflow = set([p]) # Run the Application Manager appman.run()
def run(): # Initialize the EnTK App Manager amgr = AppManager(hostname="localhost", port=5672) # Apply the resource configuration provided by the user res_desc = {'resource': "rutgers.amarel", 'walltime': 60, 'cpus': 4, 'queue': "kopp_1", 'project': "", 'schema': "local"} amgr.resource_desc = res_desc # Push the input data to the shared directory amgr.shared_data = ['CMIP6_CanESM5_Omon_piControl_r1i1p1f1_zos_6000-6199.nc', 'xarray_script.py'] # New pipeline p1 = Pipeline() p1.name = "Test-pipeline1" p2 = Pipeline() p2.name = "Test-pipeline2" # First stage with two tasks s1 = Stage() s1.name = "Test-stage1" s2 = Stage() s2.name = "Test-stage2" t1 = Task() t1.name = "Test-task1" t1.pre_exec = ["pip3 install --upgrade; pip3 install pandas zarr cftime toolz \"dask[complete]\" bottleneck xarray"] t1.executable = 'python3' t1.arguments = ['xarray_script.py'] t1.copy_input_data = ["$SHARED/CMIP6_CanESM5_Omon_piControl_r1i1p1f1_zos_6000-6199.nc", "$SHARED/xarray_script.py"] t1.download_output_data = ["test_netcdf_file.nc > test_netcdf_file1.nc"] t2 = copy.deepcopy(t1) t2.name = "Test-task2" t2.download_output_data = ["test_netcdf_file.nc > test_netcdf_file2.nc"] t3 = copy.deepcopy(t1) t3.name = "Test-task3" t3.download_output_data = ["test_netcdf_file.nc > test_netcdf_file3.nc"] t4 = copy.deepcopy(t1) t4.name = "Test-task4" t4.download_output_data = ["test_netcdf_file.nc > test_netcdf_file4.nc"] # Assign tasks and stages to pipeline s1.add_tasks(t1) s1.add_tasks(t2) p1.add_stages(s1) s2.add_tasks(t3) s2.add_tasks(t4) p2.add_stages(s2) # Assign the pipeline to the workflow and run amgr.workflow = [p1, p2] amgr.run() # Done return(None)
def test_write_session_description(): hostname = os.environ.get('RMQ_HOSTNAME', 'localhost') port = int(os.environ.get('RMQ_PORT', 5672)) amgr = AppManager(hostname=hostname, port=port) amgr.resource_desc = { 'resource': 'xsede.stampede', 'walltime': 60, 'cpus': 128, 'gpus': 64, 'project': 'xyz', 'queue': 'high' } workflow = [generate_pipeline(1), generate_pipeline(2)] amgr.workflow = workflow amgr._wfp = WFprocessor(sid=amgr._sid, workflow=amgr._workflow, pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=amgr._resubmit_failed) amgr._wfp._initialize_workflow() amgr._workflow = amgr._wfp.workflow amgr._task_manager = TaskManager(sid=amgr._sid, pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, rmgr=amgr._resource_manager, port=amgr._port) # os.mkdir(amgr._sid) write_session_description(amgr) desc = ru.read_json('%s/radical.entk.%s.json' % (amgr._sid, amgr._sid)) assert desc == { 'config': {}, 'entities': { 'appmanager': { 'event_model': {}, 'state_model': None, 'state_values': None }, 'pipeline': { 'event_model': {}, 'state_model': { 'CANCELED': 9, 'DESCRIBED': 1, 'DONE': 9, 'FAILED': 9, 'SCHEDULING': 2 }, 'state_values': { '1': 'DESCRIBED', '2': 'SCHEDULING', '9': ['DONE', 'CANCELED', 'FAILED'] } }, 'stage': { 'event_model': {}, 'state_model': { 'CANCELED': 9, 'DESCRIBED': 1, 'DONE': 9, 'FAILED': 9, 'SCHEDULED': 3, 'SCHEDULING': 2 }, 'state_values': { '1': 'DESCRIBED', '2': 'SCHEDULING', '3': 'SCHEDULED', '9': ['FAILED', 'CANCELED', 'DONE'] } }, 'task': { 'event_model': {}, 'state_model': { 'CANCELED': 9, 'DEQUEUED': 8, 'DEQUEUEING': 7, 'DESCRIBED': 1, 'DONE': 9, 'EXECUTED': 6, 'FAILED': 9, 'SCHEDULED': 3, 'SCHEDULING': 2, 'SUBMITTED': 5, 'SUBMITTING': 4 }, 'state_values': { '1': 'DESCRIBED', '2': 'SCHEDULING', '3': 'SCHEDULED', '4': 'SUBMITTING', '5': 'SUBMITTED', '6': 'EXECUTED', '7': 'DEQUEUEING', '8': 'DEQUEUED', '9': ['DONE', 'CANCELED', 'FAILED'] } } }, 'tree': { 'appmanager.0000': { 'cfg': {}, 'children': [ 'wfprocessor.0000', 'resource_manager.0000', 'task_manager.0000', 'pipeline.0000', 'pipeline.0001' ], 'etype': 'appmanager', 'has': [ 'pipeline', 'wfprocessor', 'resource_manager', 'task_manager' ], 'uid': 'appmanager.0000' }, 'pipeline.0000': { 'cfg': {}, 'children': ['stage.0000', 'stage.0001'], 'etype': 'pipeline', 'has': ['stage'], 'uid': 'pipeline.0000' }, 'pipeline.0001': { 'cfg': {}, 'children': ['stage.0002', 'stage.0003'], 'etype': 'pipeline', 'has': ['stage'], 'uid': 'pipeline.0001' }, 'resource_manager.0000': { 'cfg': {}, 'children': [], 'etype': 'resource_manager', 'has': [], 'uid': 'resource_manager.0000' }, 'stage.0000': { 'cfg': {}, 'children': ['task.0000'], 'etype': 'stage', 'has': ['task'], 'uid': 'stage.0000' }, 'stage.0001': { 'cfg': {}, 'children': [ 'task.0001', 'task.0002', 'task.0003', 'task.0004', 'task.0005', 'task.0006', 'task.0007', 'task.0008', 'task.0009', 'task.0010' ], 'etype': 'stage', 'has': ['task'], 'uid': 'stage.0001' }, 'stage.0002': { 'cfg': {}, 'children': ['task.0011'], 'etype': 'stage', 'has': ['task'], 'uid': 'stage.0002' }, 'stage.0003': { 'cfg': {}, 'children': [ 'task.0012', 'task.0013', 'task.0014', 'task.0015', 'task.0016', 'task.0017', 'task.0018', 'task.0019', 'task.0020', 'task.0021' ], 'etype': 'stage', 'has': ['task'], 'uid': 'stage.0003' }, 'task.0000': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0000' }, 'task.0001': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0001' }, 'task.0002': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0002' }, 'task.0003': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0003' }, 'task.0004': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0004' }, 'task.0005': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0005' }, 'task.0006': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0006' }, 'task.0007': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0007' }, 'task.0008': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0008' }, 'task.0009': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0009' }, 'task.0010': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0010' }, 'task.0011': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0011' }, 'task.0012': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0012' }, 'task.0013': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0013' }, 'task.0014': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0014' }, 'task.0015': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0015' }, 'task.0016': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0016' }, 'task.0017': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0017' }, 'task.0018': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0018' }, 'task.0019': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0019' }, 'task.0020': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0020' }, 'task.0021': { 'cfg': {}, 'children': [], 'etype': 'task', 'has': [], 'uid': 'task.0021' }, 'task_manager.0000': { 'cfg': {}, 'children': [], 'etype': 'task_manager', 'has': [], 'uid': 'task_manager.0000' }, 'wfprocessor.0000': { 'cfg': {}, 'children': [], 'etype': 'wfprocessor', 'has': [], 'uid': 'wfprocessor.0000' } } } shutil.rmtree(amgr._sid)
def main(): # Get/Set radical configuration attributes if os.environ.get("RADICAL_ENTK_VERBOSE") == None: os.environ["RADICAL_ENTK_REPORT"] = "True" hostname = os.environ.get("RMQ_HOSTNAME", "localhost") port = os.environ.get("RMQ_PORT", 5672) username = os.environ.get("RMQ_USERNAME") password = os.environ.get("RMQ_PASSWORD") # Input argument parsing parser = argparse.ArgumentParser("Launch icemodel ensemble") parser.add_argument("executable", type=str, help="location of the model executable") parser.add_argument("clifvmax_step", type=int, help="CLIFVMAX paramater range step value") parser.add_argument("crevliq_step", type=int, help="CREVLIQ paramater range step value") parser.add_argument( "param_space", type=int, help= "The size of the 2d parameter space (e.g. <param_space>X<param_space))", ) parser.add_argument( "resource_reqs", type=argparse.FileType("r"), help="Resource requirements json", ) args = parser.parse_args() # Create iterable objects for the range of values possible for CLIFFMAX and CREVLIQ clifvmax_range = range(0, args.clifvmax_step * args.param_space, args.clifvmax_step) crevliq_range = range(0, args.crevliq_step * args.param_space, args.crevliq_step) # Setting up the pipeline # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Add tasks to stage add_tasks(crevliq_range, clifvmax_range, s1, args.executable) # Add Stage to the Pipeline p.add_stages(s1) # Create Application Manager appman = AppManager(hostname=hostname, port=port, username=username, password=password) # Assign the workflow as a set or list of Pipelines to the Application Manager appman.workflow = set([p]) # Load resource requirements file for running on Bridges res_dict = json.load(args.resource_reqs) # Assign resource request description to the Application Manager appman.resource_desc = res_dict # Run the Application Manager appman.run()
# resource is 'local.localhost' to execute locally res_dict = { 'resource': 'ornl.summit', 'queue': queue, 'schema': 'local', 'walltime': 60 * hrs_wt, 'cpus': N_jobs_MD * 7, 'gpus': N_jobs_MD, #6*2 , 'project': proj_id } # Create Application Manager # appman = AppManager() appman = AppManager(hostname=os.environ.get('RMQ_HOSTNAME'), port=int(os.environ.get('RMQ_PORT'))) appman.resource_desc = res_dict p1 = generate_training_pipeline() # p2 = generate_MDML_pipeline() pipelines = [] pipelines.append(p1) # pipelines.append(p2) # Assign the workflow as a list of Pipelines to the Application Manager. In # this way, all the pipelines in the list will execute concurrently. appman.workflow = pipelines # Run the Application Manager appman.run()
raise ValueError( "Invalid RMQ environment. Please see README.md for configuring environment." ) # Calculate total number of nodes required. Assumes 1 MD job per GPU # TODO: fix this assumption for NAMD num_full_nodes, extra_gpus = divmod(cfg.molecular_dynamics_stage.num_tasks, cfg.gpus_per_node) extra_node = int(extra_gpus > 0) num_nodes = max(1, num_full_nodes + extra_node) appman.resource_desc = { "resource": cfg.resource, "queue": cfg.queue, "schema": cfg.schema_, "walltime": cfg.walltime_min, "project": cfg.project, "cpus": cfg.cpus_per_node * cfg.hardware_threads_per_cpu * num_nodes, "gpus": cfg.gpus_per_node * num_nodes, } pipeline_manager = PipelineManager(cfg) # Back up configuration file (PipelineManager must create cfg.experiment_dir) shutil.copy(args.config, cfg.experiment_directory) pipelines = pipeline_manager.generate_pipelines() # Assign the workflow as a list of Pipelines to the Application Manager. # All the pipelines in the list will execute concurrently. appman.workflow = pipelines # Run the Application Manager
res_dict = { 'resource': 'xsede.supermic', 'walltime': 30, 'cpus': total_cores, 'project': 'TG-MCB090174', 'access_schema': 'gsissh' } # Download analysis file from MobleyLab repo os.system( 'curl -O https://raw.githubusercontent.com/MobleyLab/alchemical-analysis/master/alchemical_analysis/alchemical_analysis.py' ) # Create Application Manager amgr = AppManager(port=33231, hostname='two.radical-project.org') amgr.resource_desc = res_dict # Assign resource manager to the Application Manager amgr.shared_data = [ './CB7G3.gro', './CB7G3.ndx', './CB7G3.top', './CB7G3_template.mdp', './analysis_1.py', './analysis_2.py', './determine_convergence.py', './alchemical_analysis.py', './3atomtypes.itp', './3_GMX.itp', './cucurbit_7_uril_GMX.itp' ] # Assign the workflow as a set of Pipelines to the Application Manager amgr.workflow = pipelines # Run the Application Manager amgr.run()