def make_md_workflow(sim, archive, stages, md_engine='gromacs', md_category='md', local_category='local', postrun_wf=None, post_wf=None, files=None): """Construct a general, single MD simulation workflow. Assumptions ----------- Queue launcher submission script must define and export the following environment variables: 1. STAGING : absolute path on resource to staging directory 2. SCRATCH : absolute path on resource to scratch directory The staging directory must already exist on all resources specified in ``stages``. The script ``run_md.sh`` must be somewhere on your path, and must take a single argument giving the directory to execute MD out of. It should create and change the working directory to that directory before anything else. Parameters ---------- sim : str MDSynthesis Sim. archive : str Absolute path to directory to launch from, which holds all required files for running MD. stages : list, str Dicts giving for each of the following keys: - 'server': server host to transfer to - 'user': username to authenticate with - 'staging': absolute path to staging area on remote resource alternatively, a path to a yaml file giving a list of dictionaries with the same information. md_engine : {'gromacs'} MD engine name; needed to determine continuation mechanism to use. md_category : str Category to use for the MD Firework. Used to target to correct rockets. local_category : str Category to use for non-MD Fireworks, which should be run by rockets where the ``archive`` directory is accessible. postrun_wf : Workflow Workflow to perform after each copyback; performed in parallel to continuation run. post_wf : Workflow Workflow to perform after completed MD (no continuation); use for final postprocessing. files : list Names of files (not paths) needed for each leg of the simulation. Need not exist, but if they do they will get staged before each run. Returns ------- workflow MD workflow; can be submitted to LaunchPad of choice. """ sim = mds.Sim(sim) #TODO: perhaps move to its own FireTask? sim.categories['md_status'] = 'running' #TODO: the trouble with this is that if this workflow is created with the intent # of being attached to another, these files may not exist at all yet f_exist = [f for f in files if os.path.exists(os.path.join(archive, f))] if isinstance(stages, string_types): with open(stages, 'r') as f: stages = yaml.load(f) ## Stage files on all resources where MD may run; takes place locally fts_stage = list() for stage in stages: fts_stage.append( FileTransferTask(mode='rtransfer', server=stage['server'], user=stage['user'], files=[os.path.join(archive, i) for i in files], dest=os.path.join(stage['staging'], sim.uuid), max_retry=5, shell_interpret=True)) fw_stage = Firework(fts_stage, spec={ '_launch_dir': archive, '_category': local_category }, name='staging') ## MD execution; takes place in queue context of compute resource # make rundir ft_mkdir = MkRunDirTask(uuid=sim.uuid) # copy input files to scratch space ft_copy = FileTransferTask( mode='copy', files=[os.path.join('${STAGING}/', sim.uuid, i) for i in files], dest=os.path.join('${SCRATCHDIR}/', sim.uuid), ignore_missing=True, shell_interpret=True) # next, run MD ft_md = ScriptTask(script='run_md.sh {}'.format( os.path.join('${SCRATCHDIR}/', sim.uuid)), use_shell=True, fizzle_bad_rc=True) # send info on where files live to pull firework ft_info = BeaconTask(uuid=sim.uuid) fw_md = Firework([ft_mkdir, ft_copy, ft_md, ft_info], spec={'_category': md_category}, name='md') ## Pull files back to archive; takes place locally ft_copyback = FilePullTask(dest=archive) fw_copyback = Firework([ft_copyback], spec={ '_launch_dir': archive, '_category': local_category }, name='pull') ## Decide if we need to continue and submit new workflow if so; takes place ## locally if md_engine == 'gromacs': ft_continue = GromacsContinueTask(sim=sim, archive=archive, stages=stages, md_engine=md_engine, md_category=md_category, local_category=local_category, postrun_wf=postrun_wf, post_wf=post_wf, files=files) else: raise ValueError("No known md engine `{}`.".format(md_engine)) fw_continue = Firework([ft_continue], spec={ '_launch_dir': archive, '_category': local_category }, name='continue') wf = Workflow([fw_stage, fw_md, fw_copyback, fw_continue], links_dict={ fw_stage: [fw_md], fw_md: [fw_copyback], fw_copyback: [fw_continue] }, name='{} | md'.format(sim.name), metadata=dict(sim.categories)) ## Mix in postrun workflow, if given if postrun_wf: if isinstance(postrun_wf, dict): postrun_wf = Workflow.from_dict(postrun_wf) wf.append_wf(Workflow.from_wflow(postrun_wf), [fw_copyback.fw_id]) return wf
def run_task(self, fw_spec): import gromacs from ..general import make_md_workflow # bit of an ad-hoc way to grab the checkpoint file cpt = [ f for f in self['files'] if (('cpt' in f) and ('prev' not in f)) ] if len(cpt) > 1: raise ValueError("Multiple CPT files in 'files'; include " "only one.") elif len(cpt) < 1: raise ValueError("No CPT file in 'files'; " "cannot do continue check.") else: cpt = os.path.join(self['archive'], cpt[0]) # bit of an ad-hoc way to grab the tpr file tpr = [f for f in self['files'] if ('tpr' in f)] if len(tpr) > 1: raise ValueError("Multiple TPR files in 'files'; include " "only one.") elif len(tpr) < 1: raise ValueError("No TPR file in 'files'; " "cannot do continue check.") else: tpr = os.path.join(self['archive'], tpr[0]) # let's extract the current frame and place it in the archive, since # this is useful for starting runs up at any point from the current end gromacs.trjconv(f=cpt, s=tpr, o=os.path.join( self['archive'], '{}.gro'.format( os.path.splitext(os.path.basename(tpr))[0])), input=('0', )) # extract step number from CPT file out = gromacs.dump(cp=cpt, stdout=False) step = int([ line.split(' ')[-1] for line in out[1].split('\n') if 'step = ' in line ][0]) # extract nsteps from TPR file out = gromacs.dump(s=tpr, stdout=False) nsteps = int([ line.split(' ')[-1] for line in out[1].split('\n') if 'nsteps' in line ][0]) # if step < nsteps, we submit a new workflow if step < nsteps: wf = make_md_workflow(sim=self['sim'], archive=self['archive'], stages=self['stages'], files=self['files'], md_engine=self['md_engine'], md_category=self['md_category'], local_category=self['local_category'], postrun_wf=self['postrun_wf'], post_wf=self['post_wf']) return FWAction(additions=[wf]) else: sim = mds.Sim(self['sim']) sim.categories['md_status'] = 'finished' # if given, we submit the post workflow post_wf = self.get('post_wf') if post_wf: if isinstance(post_wf, dict): post_wf = Workflow.from_dict(post_wf) # this makes a fresh copy without already-used fw_ids post_wf = Workflow.from_wflow(post_wf) return FWAction(additions=[post_wf])